[PYTHON] NumPy basics

Features of NumPy

--Calculate multidimensional arrays at once at high speed.

ndarray: Multidimensional array object

--Example of ndarray

import numpy as np

data = np.random.randn(2,3) #Random numbers in 2 columns and 3 rows

print(data)
# [[-0.4440664  -0.07889544 -0.84781375]
#  [ 0.59333292 -0.03008522  1.54106015]]

print(data * 10) #Multiply
# [[-4.44066398 -0.78895438 -8.47813747]
#  [ 5.93332925 -0.3008522  15.41060155]]

print(data + data) #Addition
# [[-0.8881328  -0.15779088 -1.69562749]
#  [ 1.18666585 -0.06017044  3.08212031]]

print(data.shape) #Number of vertical and horizontal elements
# (2, 3)

print(data.dtype) #Element type
# float64

Generation of ndarray

--Generate with np.array (list). -- np.zeros (10) np.zeros ((3,6)) creates an ndarray with all elements 0. --Similarly, np.ones () creates an ndarray with all elements 1. --Fill np.full () with the specified value. --With np.arange (10), generate an ndarray in which elements 0 to 9 are entered in order.


import numpy as np

data = [[1, 2, 3, 4], [5, 6, 7, 8]]

arr = np.array(data) #Generate ndarray from list

print(arr)
# [[1 2 3 4]
#  [5 6 7 8]]

print(arr.ndim) #dimension
# 2

print(arr.shape) #Element count
# (2, 4)

ndarray data type

import numpy as np

arr1 = np.array([-3.7, -1.2, 0.5, 4.5])
print(arr1)
# [-3.7 -1.2  0.5  4.5]

arr2 = arr1.astype(np.int32)  #To cast
print(arr2)
# [-3 -1  0  4]

arr3 = np.array(['-3.7', '-1.2', '0.5', '4.5'], dtype=np.string_)
print(arr3)
# [b'-3.7' b'-1.2' b'0.5' b'4.5']
print(arr3.dtype)
# |S4

arr4 = arr3.astype(np.float64)  #To cast
print(arr4)
# [-3.7 -1.2  0.5  4.5]

Arithmetic operation of ndarray

--Calculations of the same size are calculated at the same position. --The operation between ndarrays of different sizes is called broadcast.

import numpy as np

arr1 = np.array([[1., 2., 3., 4.], [5., 6., 7., 8.]])
print(arr1 ** 2)
# [[ 1.  4.  9. 16.]
#  [25. 36. 49. 64.]]

print(arr1 - arr1)
# [[0. 0. 0. 0.]
#  [0. 0. 0. 0.]]

print(1 / arr1)
# [[1.         0.5        0.33333333 0.25      ]
#  [0.2        0.16666667 0.14285714 0.125     ]]

arr2 = np.array([[0., 4., 1., 5.], [3., 9., 4., 9.]])
print(arr1 < arr2)
# [[False  True False  True]
#  [False  True False  True]]

Index reference and slicing basics

--The index reference cuts out a part from the data. ――You can specify a scalar for a part of the cut out, and the specified value will be propagated to the whole cut out. (broadcast) --Slices are views, not copies.

import numpy as np

arr1 = np.arange(10)
print(arr1)
# [0 1 2 3 4 5 6 7 8 9]

print(arr1[5:8])
# [5 6 7]

arr1[5:8] = 12
print(arr1)
# [ 0  1  2  3  4 12 12 12  8  9]

arr_slice = arr1[5:8]
arr_slice[1] = 12345

print(arr_slice)
# [   12 12345    12]

print(arr1)
# [    0     1     2     3     4    12 12345    12     8     9]

--Two-dimensional slice

arr2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])  #2D
print(arr2d)
# [[1 2 3]
#  [4 5 6]
#  [7 8 9]]

print(arr2d[:2])  #Vertical 0,Take out the first
# [[1 2 3]
#  [4 5 6]]

print(arr2d[:2, 1:])  #Vertical 0,1st, 1 on the side street,Take out the second
# [[2 3]
#  [5 6]]

print(arr2d[1, :2])  #1st in the vertical, 0 in the horizontal road,Take out the first
# [4 5]

print(arr2d[:, :1])  #All vertical, horizontal 0,Take out the first
# [[1]
#  [4]
#  [7]]

arr2d[:2, 1:] = 0 #Substitute everything in that range
print(arr2d)
# [[1 0 0]
#  [4 0 0]
#  [7 8 9]]

Boolean index reference

--Based on the condition of the array with 7 arrays (validity value array), the rows of the two-dimensional array of (7,4) can be extracted. --The number of elements on the axis of the referenced array and the number of elements on the boolean array must match.

import numpy as np

names = np.array(['Yamada', 'Suzuki', 'Sato', 'Yamada', 'Tanaka', 'Tanaka', 'Sato'])

data = np.random.randn(7, 4) #Create an array with 7 rows vertically and 4 columns horizontally with random numbers
print(data)
# [[-0.92866442 -0.81744986  1.11821763 -0.55440628]
#  [-0.09511771  0.99145963  0.38475434  0.59748055]
#  [ 0.0444708  -0.00381292  0.97888419  1.242504  ]
#  [ 0.89214068 -1.0411466   0.90850611 -2.02933442]
#  [ 0.78789041 -0.84593788 -0.5624772   0.32488453]
#  [ 0.50153002 -0.25411512  0.30855623 -1.31825153]
#  [-0.6596584   1.53735231 -0.37044833  1.93782111]]

print(names == 'Yamada') # 'Yamada'Matches, 0,4th returns True
# [ True False False  True False False False]

print(data[names == 'Yamada']) #from data to 0,Take out the 4th
# [[-0.92866442 -0.81744986  1.11821763 -0.55440628]
#  [ 0.89214068 -1.0411466   0.90850611 -2.02933442]]

mask = (names == 'Yamada') | (names == 'Sato') #You can also take it out with or
print(data[(names == 'Yamada') | (names == 'Sato')])
# [[-0.92866442 -0.81744986  1.11821763 -0.55440628]
#  [ 0.0444708  -0.00381292  0.97888419  1.242504  ]
#  [ 0.89214068 -1.0411466   0.90850611 -2.02933442]
#  [-0.6596584   1.53735231 -0.37044833  1.93782111]]

data[names  == 'Yamada'] = 0 # 0,Fourth, put 0
print(data)
# [[ 0.          0.          0.          0.        ]
#  [-0.09511771  0.99145963  0.38475434  0.59748055]
#  [ 0.0444708  -0.00381292  0.97888419  1.242504  ]
#  [ 0.          0.          0.          0.        ]
#  [ 0.78789041 -0.84593788 -0.5624772   0.32488453]
#  [ 0.50153002 -0.25411512  0.30855623 -1.31825153]
#  [-0.6596584   1.53735231 -0.37044833  1.93782111]]

See fancy index

--Fancy index reference is a method that uses an integer array for index reference. --Unlike slicing, fancy index references always return a copy of the original data

import numpy as np

arr = np.arange(32).reshape(8, 4)  # 8,Create an array of 4
print(arr)
# [[ 0  1  2  3]
#  [ 4  5  6  7]
#  [ 8  9 10 11]
#  [12 13 14 15]
#  [16 17 18 19]
#  [20 21 22 23]
#  [24 25 26 27]
#  [28 29 30 31]]

print(arr[[0, 4, 5, -1]])  # 0, 4, 5, -Returns one line
# [[ 0  1  2  3]
#  [16 17 18 19]
#  [20 21 22 23]
#  [28 29 30 31]]

print(arr[[1, 5, 7, 2], [0, 3, 1, 2]])  # (1,0),(5,3),(7,1),(2,2)return it
# [ 4 23 29 10]

Transpose matrix, swapping rows and columns

--Transpose of ndarray returns a special view that reconstructs the original matrix. Does not make a copy. --There are two ways to apply the transpose function and to refer to T, which is one of the attributes of ndarray.

import numpy as np

arr1 = np.arange(15).reshape(3, 5)  # 3,Create an array of 5
print(arr1)
# [[ 0  1  2  3  4]
#  [ 5  6  7  8  9]
#  [10 11 12 13 14]]

print(arr1.T)
# [[ 0  5 10]
#  [ 1  6 11]
#  [ 2  7 12]
#  [ 3  8 13]
#  [ 4  9 14]]

arr2 = np.arange(24).reshape((2, 3, 4))  # (2,3,4)Create an array of
print(arr2)
# [[[ 0  1  2  3]
#   [ 4  5  6  7]
#   [ 8  9 10 11]]
# 
#  [[12 13 14 15]
#   [16 17 18 19]
#   [20 21 22 23]]]

print(arr2.transpose((1, 0, 2)))  #Change the order of the axes,(3,2,4)Made into an array of
# [[[ 0  1  2  3]
#   [12 13 14 15]]
# 
#  [[ 4  5  6  7]
#   [16 17 18 19]]
# 
#  [[ 8  9 10 11]
#   [20 21 22 23]]]

Universal function: Function application to all array elements

-The universal function is a function that returns the investigation result for each element for ndarray. -Unary ufunc Takes one ndarray -Abs, fabs, sqrt, square, exp, log, log10, log2, log1p, sign, cell, floor, rint modf, isnan, isfinite, isinf, cos, sin, tan, etc. -Two terms ufunc Take two ndarrays -Add, subtract, multiply, divide, floor_dvide, power, maximum, fmax, minimum, fmin, mod, copysign, grater, less, equal, logical_and, etc.

Array-oriented programming with ndarray

--Display the result of sqrt (x ^ 2 + y ^ 2) for grid point data --np.meshgrid takes two one-dimensional functions and enumerates all combinations of each element.

import numpy as np
import matplotlib.pyplot as plt

points = np.arange(-5, 5, 0.01)  #1000 grid points
xs, ys = np.meshgrid(points, points)  #Returns all combinations

z = np.sqrt(xs ** 2 + ys ** 2)

print(z)

plt.imshow(z, cmap=plt.cm.gray)
plt.colorbar()
plt.title("Image plot of $\\sqrt{x^2 + x^2}$ for a grid of values")
plt.show()
スクリーンショット 2020-01-27 9.07.30.png

Representation of conditional control in ndarray

--np.where () returns the second argument when the first argument is True, and the third argument otherwise. --Each argument can be a list value or a scalar value.

import numpy as np
import matplotlib.pyplot as plt

xarr = np.array([1.1, 1.2, 1.3, 1.4, 1.5])
yarr = np.array([2.1, 2.2, 2.3, 2.4, 2.5])
cond = np.array([True, False, True, True, False])
#Takes x when cond is True, y otherwise
#Comprehension (slow)
reult1 = [(x if c else y) for x, y, c in zip(xarr, yarr, cond)]
#use where
result2 = np.where(cond, xarr,yarr)
print(result2)
# [1.1 2.2 1.3 1.4 2.5]

Mathematical functions, statistical functions

Specify which axis to process with ʻaxis --ʻArr.sum (), ʻarr.mean ()Overall sum and average --ʻArr.sum (axis = 1), ʻarr.mean (axis = 0)` In two dimensions, the sum of rows and the average of columns, respectively.

Boolean array function

--When ʻarrisnp.array, (arr> 0) .sum () is the number of positive numbers (True). --When bool = boolean array --If there is True even inbools.any () 1, then True --bools.all ()` True if everything is True

sort

--When ʻarrisnp.array, sort itself with ʻarr.sort (). (Destructive) --For multidimensional arrays, specify any axis, such as ʻarr.sort (1) `.

Set function: unique, etc.

--np.unique (arr) removes duplicates and returns the sorted result. --np.inld (arr, [2, 3, 6]) is True if the list of arr contains 2,3,6, otherwise False is a list of the same length as arr. return.

ndarray file input / output

--Many use pandas to read text files and tabular data. --Here we focus on the binary format. --To save ʻarr, save it uncompressed with np.save ('some_array', arr). The extension is automatically added with .npy. --Similarly, read with ʻarr = np.load ('save_array.npy') . --- Save multiple arrs uncompressed with np.savez ('array_archive.npz', a = arr1, b = arr2). ʻA, b are key dictionaries. --Read with ʻarch = np.load ('array_archive.npz'). Not loadz. --Extract ʻarr1 with ʻarch ['a']. Extract ʻarr2 with ʻarch ['b']. --Compress and save with np.savez_compressed ('arrays_compressed.npz',, a = arr1, b = arr2). Reading is the same as above.

Matrix calculation

--For calculating the inner product. Use dot.


import numpy as np

x = np.array([[1., 2., 3.], [4., 5., 6., ]])
y = np.array([[6., 23.], [-1., 7.], [8., 9.]])

print(x.dot(y))  #inner product
# [[ 28.  64.]
#  [ 67. 181.]]

print(np.dot(x, y))  #Another way of writing
# [[ 28.  64.]
#  [ 67. 181.]]

print(x @ y)  # @Can also be used.
# [[ 28.  64.]
#  [ 67. 181.]]

Pseudo-random number generation

--Use normal for a 4x4 matrix with np.random.normal (size = (4,4)) to generate random numbers based on a normal distribution. --randint Returns an integer random number within the given integer range.

import numpy as np
import matplotlib.pyplot as plt

samples = np.random.normal(size=(4,4))
print(samples)
# [[ 1.45907882  1.78873804 -0.52480754  0.20770224]
#  [-1.55474475 -1.67045483 -1.3589208   1.25584424]
#  [ 0.90562937 -1.50742692  1.48579887  1.48081589]
#  [ 1.3478]5606 -0.20653648  0.13308665 -0.24455952]

Example: Random walk

Multiple random walk

--Random walk, after 5000 trials, find the index that first reached 30 or -30 at once

import numpy as np
import matplotlib.pyplot as plt

nwalks = 5000
nsteps = 1000

#Randomly generate 0 or 1
draws = np.random.randint(0, 2, size=(nwalks, nsteps))
print(draws)
# [[1 1 1 ... 0 1 1]
#  [1 1 0 ... 0 0 1]
#  [0 0 1 ... 1 1 0]
#  ...
#  [0 0 1 ... 0 0 0]
#  [0 0 1 ... 1 0 0]
#  [1 0 1 ... 1 1 0]]

#0 and 1-Divide into 1 and 1
steps = np.where(draws > 0, 1, -1)
print(steps)
# [[ 1  1  1 ... -1  1  1]
#  [ 1  1 -1 ... -1 -1  1]
#  [-1 -1  1 ...  1  1 -1]
#  ...
#  [-1 -1  1 ... -1 -1 -1]
#  [-1 -1  1 ...  1 -1 -1]
#  [ 1 -1  1 ...  1  1 -1]]


#Add in the horizontal direction
walks = steps.cumsum(1)
print(walks)
# [[  1   2   3 ...  10  11  12]
#  [  1   2   1 ... -44 -45 -44]
#  [ -1  -2  -1 ... -28 -27 -28]
#  ...
#  [ -1  -2  -1 ...   6   5   4]
#  [ -1  -2  -1 ...  -6  -7  -8]
#  [  1   0   1 ...  28  29  28]]

print(walks.max())
# 128

print(walks.min())
# -123

#Have you reached 30? True in rows/Return with False
hits30 = (np.abs(walks) >= 30).any(1)
print(hits30)
# [False False  True ...  True  True  True]

#30-Number of reached 30
print(hits30.sum())
# 3377

# 30/-Take out the row that reached 30 and find the very first index
crossing_times = (np.abs(walks[hits30]) >= 30).argmax(1)
print(crossing_times)
# [671 313 161 ... 307 289  89]

#average
print(crossing_times.mean())
# 500.09327805744744

#Display on the graph
max_row = walks[(walks == walks.max()).any(1)][0]
min_row = walks[(walks == walks.min()).any(1)][0]

plt.plot(max_row)
plt.plot(min_row)
plt.show()

--Maximum and minimum graph of random walk

スクリーンショット 2020-01-30 12.53.18.png

reference

--Introduction to Data Analysis with Python 2nd Edition

Recommended Posts

NumPy basics
#Python basics (#Numpy 1/2)
#Python basics (#Numpy 2/2)
Python #Numpy basics
Python basics 8 numpy test
Python basics ⑤
Numpy [Basic]
numpy part 1
Linux basics
Python basics
Python basics ④
Numpy Memorandum_Matrix
numpy tips
Pandas basics
Git basics
Python basics ③
Python basics
Django basics
About numpy
Linux basics
NumPy axis
Pandas basics
Use Numpy
Python basics
numpy part 2
Python basics
Python basics ③
Python basics ②
Python basics ②
Speeding up numerical calculations with NumPy: Basics
Python basics: list
Python basics memorandum
Shell script basics # 2
#Python basics (#matplotlib)
Python CGI basics
Python basics: dictionary
numpy unit test
NumPy array manipulation (3)
list and numpy
Basics of Python ①
NumPy universal functions
Basics of python ①
numpy memorandum 1 / np.pad
Python slice basics
#Python basics (scope)
Go class basics
Unsupervised learning 1 Basics
#Python basics (functions)
numpy index search
NumPy array manipulation (1)
Python array basics
[Numpy] Shuffle ndarray
Python profiling basics
Linux command basics
numpy non-basic techniques
About Numpy broadcast
[PyTorch] Sample ① ~ NUMPY ~
Python basics: functions
#Python basics (class)
Install Numpy + atlas
Python basics summary