http://rest-term.com/archives/2999/ http://algorithm.joho.info/programming/python-numpy-sample-code/ There is a good summary in, so it's enough to look at them, but make a note for yourself to fix the memory. (Appropriate English is also written depending on various circumstances)
Numpy
>>> import numpy as np
>>> x = np.array([1, 2, 3])
>>> x
array([1, 2, 3])
>>> y = np.array([[1, 2, 3], [4, 5, 6]])
>>> y
array([[1, 2, 3],
[4, 5, 6]])
>>> y.shape
(2, 3)
>>> m = np.arange(0, 30, 2)
>>> m
array([ 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28])
>>> np.linspace(1, 4, 9)
array([ 1. , 1.375, 1.75 , 2.125, 2.5 , 2.875, 3.25 , 3.625, 4. ])
>>> m = np.arange(0, 30, 2)
>>> m.reshape(3, 5)
array([[ 0, 2, 4, 6, 8],
[10, 12, 14, 16, 18],
[20, 22, 24, 26, 28]])
>>> m
array([ 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28])
>>> m = np.arange(0, 30, 2)
>>> m.resize(3, 3)
>>> m
array([[ 0, 2, 4],
[ 6, 8, 10],
[12, 14, 16]])
>>> np.ones((4, 3))
array([[ 1., 1., 1.],
[ 1., 1., 1.],
[ 1., 1., 1.],
[ 1., 1., 1.]])
>>>
>>> np.ones((2, 3), int)
array([[1, 1, 1],
[1, 1, 1]])
>>> np.zeros((4, 3))
array([[ 0., 0., 0.],
[ 0., 0., 0.],
[ 0., 0., 0.],
[ 0., 0., 0.]])
>>> np.eye(5)
array([[ 1., 0., 0., 0., 0.],
[ 0., 1., 0., 0., 0.],
[ 0., 0., 1., 0., 0.],
[ 0., 0., 0., 1., 0.],
[ 0., 0., 0., 0., 1.]])
>>> np.diag([[ 1, 3, 5], [ 7, 9, 11], [13, 15, 17]])
array([ 1, 9, 17])
>>> np.array([1, 2, 3] * 3)
array([1, 2, 3, 1, 2, 3, 1, 2, 3])
>>> np.repeat([1, 2, 3], 3)
array([1, 1, 1, 2, 2, 2, 3, 3, 3])
>>> x = np.array([[1, 2, 3]])
>>> y = np.array([[4, 5, 6], [7, 8, 9]])
>>> np.vstack([x, y])
array([[1, 2, 3],
[4, 5, 6],
[7, 8, 9]])
>>> x = np.array([[1, 2], [3, 4]])
>>> y = np.array([[5, 6, 7], [8, 9, 0]])
>>> np.hstack([x, y])
array([[1, 2, 5, 6, 7],
[3, 4, 8, 9, 0]])
>>> np.random.randint(0, 10, (4, 3))
array([[6, 7, 8],
[5, 4, 9],
[5, 4, 9],
[5, 9, 2]])
>>> np.random.randint(0, 10, (4, 3))
array([[5, 7, 5],
[8, 4, 3],
[2, 9, 6],
[7, 9, 5]])
>>> x = np.array([[1, 2, 3], [4, 5, 6]])
>>> x
array([[1, 2, 3],
[4, 5, 6]])
>>> y = np.array([[7, 8, 9], [10, 11, 12]])
>>> y
array([[ 7, 8, 9],
[10, 11, 12]])
>>> x + y
array([[ 8, 10, 12],
[14, 16, 18]])
>>> x + x + y
array([[ 9, 12, 15],
[18, 21, 24]])
>>> x * y
array([[ 7, 16, 27],
[40, 55, 72]])
>>> x ** 2
array([[ 1, 4, 9],
[16, 25, 36]])
>>> x ** 3
array([[ 1, 8, 27],
[ 64, 125, 216]])
>>> x.dot(y)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
ValueError: shapes (2,3) and (2,3) not aligned: 3 (dim 1) != 2 (dim 0)
>>>
>>> z = np.array([[1], [2], [3]])
>>> z
array([[1],
[2],
[3]])
>>> x.dot(z)
array([[14],
[32]])
>>> x
array([[1, 2, 3],
[4, 5, 6]])
>>> x.T
array([[1, 4],
[2, 5],
[3, 6]])
>>> x.T.T
array([[1, 2, 3],
[4, 5, 6]])
>>>
>>> z
array([[1],
[2],
[3]])
>>> z.T
array([[1, 2, 3]])
>>> x
array([[1, 2, 3],
[4, 5, 6]])
>>>
>>> x.dtype
dtype('int64')
>>>
>>> x.astype('f')
array([[ 1., 2., 3.],
[ 4., 5., 6.]], dtype=float32)
>>> x
array([[1, 2, 3],
[4, 5, 6]])
>>> x.max()
6
>>> np.max(x)
6
>>> x.min()
1
>>> np.min(x)
1
>>> x.sum()
21
>>> np.sum(x)
21
>>> x.mean()
3.5
>>> np.mean(x)
3.5
>>> np.average(x)
3.5
>>> x.std()
1.707825127659933
>>> np.std(x)
1.707825127659933
>>> x
array([[1, 2, 3],
[4, 5, 6]])
>>> x.argmax()
5
>>> x.argmin()
0
>>>
>>> y = np.array([[1, 2, 3], [1, 2, 3]])
>>> y
array([[1, 2, 3],
[1, 2, 3]])
>>> y.argmax()
2
>>> y.argmin()
0
>>> s = np.arange(13) ** 2
>>> s
array([ 0, 1, 4, 9, 16, 25, 36, 49, 64, 81, 100, 121, 144])
>>> s[0]
0
>>> s[11]
121
>>> s[0:3]
array([0, 1, 4])
>>> s[0], s[11], s[0:3]
(0, 121, array([0, 1, 4]))
>>> s[-4:]
array([ 81, 100, 121, 144])
>>> s[-4:-1]
array([ 81, 100, 121])
>>> s[-4::-1]
array([81, 64, 49, 36, 25, 16, 9, 4, 1, 0])
>>> r = np.arange(36)
>>> r.resize((6, 6))
>>> r
array([[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11],
[12, 13, 14, 15, 16, 17],
[18, 19, 20, 21, 22, 23],
[24, 25, 26, 27, 28, 29],
[30, 31, 32, 33, 34, 35]])
>>>
>>> r[2, 2]
14
>>> r[3, 3:6]
array([21, 22, 23])
>>> r[3, 3:7]
array([21, 22, 23])
>>> r[:2, :-1]
array([[ 0, 1, 2, 3, 4],
[ 6, 7, 8, 9, 10]])
>>> r[:-1, ::2]
array([[ 0, 2, 4],
[ 6, 8, 10],
[12, 14, 16],
[18, 20, 22],
[24, 26, 28]])
>>> r[r > 30]
array([31, 32, 33, 34, 35])
>>> r[r > 20]
array([21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35])
>>> r[r > 20] = 20
>>> r
array([[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11],
[12, 13, 14, 15, 16, 17],
[18, 19, 20, 20, 20, 20],
[20, 20, 20, 20, 20, 20],
[20, 20, 20, 20, 20, 20]])
>>> r = np.arange(36)
>>> r.resize((6, 6))
>>> r
array([[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11],
[12, 13, 14, 15, 16, 17],
[18, 19, 20, 21, 22, 23],
[24, 25, 26, 27, 28, 29],
[30, 31, 32, 33, 34, 35]])
>>>
>>> r2 = r[2:4, 2:4]
>>> r2
array([[14, 15],
[20, 21]])
>>>
>>> r2[:] = -1
>>> r2
array([[-1, -1],
[-1, -1]])
>>> r
array([[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11],
[12, 13, -1, -1, 16, 17],
[18, 19, -1, -1, 22, 23],
[24, 25, 26, 27, 28, 29],
[30, 31, 32, 33, 34, 35]])
r2 = r [2: 4, 2: 4]
, the reference is passed to r2
, so editing r2
means editing r
. ..>>> r = np.arange(36)
>>> r.resize((6, 6))
>>> r
array([[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11],
[12, 13, 14, 15, 16, 17],
[18, 19, 20, 21, 22, 23],
[24, 25, 26, 27, 28, 29],
[30, 31, 32, 33, 34, 35]])
>>>
>>> r2 = r[2:4, 2:4].copy()
>>> r2
array([[14, 15],
[20, 21]])
>>>
>>> r2[:] = -1
>>> r2
array([[-1, -1],
[-1, -1]])
>>> r
array([[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11],
[12, 13, 14, 15, 16, 17],
[18, 19, 20, 21, 22, 23],
[24, 25, 26, 27, 28, 29],
[30, 31, 32, 33, 34, 35]])
r2 = r [2: 4, 2: 4] .copy ()
, the new array copied from r
is passed to r2
, so r2
and r
are separate. object. Editing r2
does not affect r
.>>> r = np.random.randint(0, 10, (4, 3))
>>> r
array([[1, 6, 3],
[3, 6, 0],
[4, 9, 3],
[5, 9, 3]])
>>>
>>> for row in r:
... print(row)
...
[1 6 3]
[3 6 0]
[4 9 3]
[5 9 3]
>>>
>>> for i, row in enumerate(r):
... print(i, ' : ', row)
...
0 : [1 6 3]
1 : [3 6 0]
2 : [4 9 3]
3 : [5 9 3]
>>> r
array([[1, 6, 3],
[3, 6, 0],
[4, 9, 3],
[5, 9, 3]])
>>> r2 = r ** 2
>>> r2
array([[ 1, 36, 9],
[ 9, 36, 0],
[16, 81, 9],
[25, 81, 9]])
>>> for x, y, z in zip(r, r2, r):
... print(x, y, z)
...
[1 6 3] [ 1 36 9] [1 6 3]
[3 6 0] [ 9 36 0] [3 6 0]
[4 9 3] [16 81 9] [4 9 3]
[5 9 3] [25 81 9] [5 9 3]
Pandas
Series
>>> s = pd.Series([168, 180, 174, 190, 170, 185, 179, 181, 175, 169, 182, 177, 180, 171])
>>>
>>> pd.cut(s, 3)
0 (167.978, 175.333]
1 (175.333, 182.667]
2 (167.978, 175.333]
3 (182.667, 190]
4 (167.978, 175.333]
5 (182.667, 190]
6 (175.333, 182.667]
7 (175.333, 182.667]
8 (167.978, 175.333]
9 (167.978, 175.333]
10 (175.333, 182.667]
11 (175.333, 182.667]
12 (175.333, 182.667]
13 (167.978, 175.333]
dtype: category
Categories (3, object): [(167.978, 175.333] < (175.333, 182.667] < (182.667, 190]]
>>>
>>> pd.cut(s, 3, labels=['Small', 'Medium', 'Large'])
0 Small
1 Medium
2 Small
3 Large
4 Small
5 Large
6 Medium
7 Medium
8 Small
9 Small
10 Medium
11 Medium
12 Medium
13 Small
dtype: category
Categories (3, object): [Small < Medium < Large]
Dataframe
All-time Olympic Games medal table is used as sample data.
>>> df[df['Gold'] == max(df['Gold'])].index[0]
'United States'
>>> df[(df['Gold'] > 0) & (df['Gold.1'] > 0)]
The following is used as sample data. / Sample data is as follow:
>>> import pandas as pd
>>> staff_df = pd.DataFrame([{'Name': 'Kelly', 'Role': 'Director of HR'},
... {'Name': 'Sally', 'Role': 'Course liasion'},
... {'Name': 'James', 'Role': 'Grader'}])
>>> staff_df = staff_df.set_index('Name')
>>> student_df = pd.DataFrame([{'Name': 'James', 'School': 'Business'},
... {'Name': 'Mike', 'School': 'Law'},
... {'Name': 'Sally', 'School': 'Engineering'}])
>>> student_df = student_df.set_index('Name')
>>>
>>> staff_df
Role
Name
Kelly Director of HR
Sally Course liasion
James Grader
>>>
>>> student_df
School
Name
James Business
Mike Law
Sally Engineering
Get data of who is student or staff
>>> pd.merge(staff_df, student_df, how='outer', left_index=True, right_index=True)
Role School
Name
James Grader Business
Kelly Director of HR NaN
Mike NaN Law
Sally Course liasion Engineering
Get data of who is student and staff
>>> pd.merge(staff_df, student_df, how='inner', left_index=True, right_index=True)
Role School
Name
James Grader Business
Sally Course liasion Engineering
Get staff data. If the staff is also a student, get School data as well. / Get data of who is staff. If the staff is also student, get the data of school.
>>> pd.merge(staff_df, student_df, how='left', left_index=True, right_index=True)
Role School
Name
Kelly Director of HR NaN
Sally Course liasion Engineering
James Grader Business
Get student data. If the student is also a staff member, get Role data as well. / Get data of who is student. If the student is also staff, get the data of role.
>>> pd.merge(staff_df, student_df, how='right', left_index=True, right_index=True)
Role School
Name
James Grader Business
Mike NaN Law
Sally Course liasion Engineering
>>> products = pd.DataFrame([{'Product ID': 4109, 'Price': 5.0, 'Product': 'Suchi Roll'},
... {'Product ID': 1412, 'Price': 0.5, 'Product': 'Egg'},
... {'Product ID': 8931, 'Price': 1.5, 'Product': 'Bagel'}])
>>> products = products.set_index('Product ID')
>>> products
Price Product
Product ID
4109 5.0 Suchi Roll
1412 0.5 Egg
8931 1.5 Bagel
>>> invoices = pd.DataFrame([{'Customer': 'Ali', 'Product ID': 4109, 'Quantity': 1},
... {'Customer': 'Eric', 'Product ID': 1412, 'Quantity': 12},
... {'Customer': 'Anda', 'Product ID': 8931, 'Quantity': 6},
... {'Customer': 'Sam', 'Product ID': 4109, 'Quantity': 2}])
>>> invoices
Customer Product ID Quantity
0 Ali 4109 1
1 Eric 1412 12
2 Anda 8931 6
3 Sam 4109 2
>>>
>>> pd.merge(products, invoices, how='right', left_index=True, right_on='Product ID')
Price Product Customer Product ID Quantity
0 5.0 Suchi Roll Ali 4109 1
1 0.5 Egg Eric 1412 12
2 1.5 Bagel Anda 8931 6
3 5.0 Suchi Roll Sam 4109 2
>>> staff_df = pd.DataFrame([{'First Name': 'Kelly', 'Last Name': 'Desjardins', 'Role': 'Director of HR'},
... {'First Name': 'Sally', 'Last Name': 'Brooks', 'Role': 'Course liasion'},
... {'First Name': 'James', 'Last Name': 'Wilde', 'Role': 'Grader'}])
>>> student_df = pd.DataFrame([{'First Name': 'James', 'Last Name': 'Hammond', 'School': 'Business'},
... {'First Name': 'Mike', 'Last Name': 'Smith', 'School': 'Law'},
... {'First Name': 'Sally', 'Last Name': 'Brooks', 'School': 'Engineering'}])
>>> staff_df
First Name Last Name Role
0 Kelly Desjardins Director of HR
1 Sally Brooks Course liasion
2 James Wilde Grader
>>> student_df
First Name Last Name School
0 James Hammond Business
1 Mike Smith Law
2 Sally Brooks Engineering
>>> pd.merge(staff_df, student_df, how='inner', left_on=['First Name','Last Name'], right_on=['First Name','Last Name'])
First Name Last Name Role School
0 Sally Brooks Course liasion Engineering
>>> df.groupby('A').agg('sum')
>>> df.groupby('A').agg({'B': sum})
Recommended Posts