http://rest-term.com/archives/2999/ http://algorithm.joho.info/programming/python-numpy-sample-code/ Es gibt eine gute Zusammenfassung, also reicht es aus, sie anzuschauen, aber ich werde mir eine Notiz machen, um mein Gedächtnis zu reparieren. (Je nach den Umständen wird auch angemessenes Englisch geschrieben.)
Numpy
>>> import numpy as np
>>> x = np.array([1, 2, 3])
>>> x
array([1, 2, 3])
>>> y = np.array([[1, 2, 3], [4, 5, 6]])
>>> y
array([[1, 2, 3],
[4, 5, 6]])
>>> y.shape
(2, 3)
>>> m = np.arange(0, 30, 2)
>>> m
array([ 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28])
>>> np.linspace(1, 4, 9)
array([ 1. , 1.375, 1.75 , 2.125, 2.5 , 2.875, 3.25 , 3.625, 4. ])
>>> m = np.arange(0, 30, 2)
>>> m.reshape(3, 5)
array([[ 0, 2, 4, 6, 8],
[10, 12, 14, 16, 18],
[20, 22, 24, 26, 28]])
>>> m
array([ 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28])
>>> m = np.arange(0, 30, 2)
>>> m.resize(3, 3)
>>> m
array([[ 0, 2, 4],
[ 6, 8, 10],
[12, 14, 16]])
>>> np.ones((4, 3))
array([[ 1., 1., 1.],
[ 1., 1., 1.],
[ 1., 1., 1.],
[ 1., 1., 1.]])
>>>
>>> np.ones((2, 3), int)
array([[1, 1, 1],
[1, 1, 1]])
>>> np.zeros((4, 3))
array([[ 0., 0., 0.],
[ 0., 0., 0.],
[ 0., 0., 0.],
[ 0., 0., 0.]])
>>> np.eye(5)
array([[ 1., 0., 0., 0., 0.],
[ 0., 1., 0., 0., 0.],
[ 0., 0., 1., 0., 0.],
[ 0., 0., 0., 1., 0.],
[ 0., 0., 0., 0., 1.]])
>>> np.diag([[ 1, 3, 5], [ 7, 9, 11], [13, 15, 17]])
array([ 1, 9, 17])
>>> np.array([1, 2, 3] * 3)
array([1, 2, 3, 1, 2, 3, 1, 2, 3])
>>> np.repeat([1, 2, 3], 3)
array([1, 1, 1, 2, 2, 2, 3, 3, 3])
>>> x = np.array([[1, 2, 3]])
>>> y = np.array([[4, 5, 6], [7, 8, 9]])
>>> np.vstack([x, y])
array([[1, 2, 3],
[4, 5, 6],
[7, 8, 9]])
>>> x = np.array([[1, 2], [3, 4]])
>>> y = np.array([[5, 6, 7], [8, 9, 0]])
>>> np.hstack([x, y])
array([[1, 2, 5, 6, 7],
[3, 4, 8, 9, 0]])
>>> np.random.randint(0, 10, (4, 3))
array([[6, 7, 8],
[5, 4, 9],
[5, 4, 9],
[5, 9, 2]])
>>> np.random.randint(0, 10, (4, 3))
array([[5, 7, 5],
[8, 4, 3],
[2, 9, 6],
[7, 9, 5]])
>>> x = np.array([[1, 2, 3], [4, 5, 6]])
>>> x
array([[1, 2, 3],
[4, 5, 6]])
>>> y = np.array([[7, 8, 9], [10, 11, 12]])
>>> y
array([[ 7, 8, 9],
[10, 11, 12]])
>>> x + y
array([[ 8, 10, 12],
[14, 16, 18]])
>>> x + x + y
array([[ 9, 12, 15],
[18, 21, 24]])
>>> x * y
array([[ 7, 16, 27],
[40, 55, 72]])
>>> x ** 2
array([[ 1, 4, 9],
[16, 25, 36]])
>>> x ** 3
array([[ 1, 8, 27],
[ 64, 125, 216]])
>>> x.dot(y)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
ValueError: shapes (2,3) and (2,3) not aligned: 3 (dim 1) != 2 (dim 0)
>>>
>>> z = np.array([[1], [2], [3]])
>>> z
array([[1],
[2],
[3]])
>>> x.dot(z)
array([[14],
[32]])
>>> x
array([[1, 2, 3],
[4, 5, 6]])
>>> x.T
array([[1, 4],
[2, 5],
[3, 6]])
>>> x.T.T
array([[1, 2, 3],
[4, 5, 6]])
>>>
>>> z
array([[1],
[2],
[3]])
>>> z.T
array([[1, 2, 3]])
>>> x
array([[1, 2, 3],
[4, 5, 6]])
>>>
>>> x.dtype
dtype('int64')
>>>
>>> x.astype('f')
array([[ 1., 2., 3.],
[ 4., 5., 6.]], dtype=float32)
>>> x
array([[1, 2, 3],
[4, 5, 6]])
>>> x.max()
6
>>> np.max(x)
6
>>> x.min()
1
>>> np.min(x)
1
>>> x.sum()
21
>>> np.sum(x)
21
>>> x.mean()
3.5
>>> np.mean(x)
3.5
>>> np.average(x)
3.5
>>> x.std()
1.707825127659933
>>> np.std(x)
1.707825127659933
>>> x
array([[1, 2, 3],
[4, 5, 6]])
>>> x.argmax()
5
>>> x.argmin()
0
>>>
>>> y = np.array([[1, 2, 3], [1, 2, 3]])
>>> y
array([[1, 2, 3],
[1, 2, 3]])
>>> y.argmax()
2
>>> y.argmin()
0
>>> s = np.arange(13) ** 2
>>> s
array([ 0, 1, 4, 9, 16, 25, 36, 49, 64, 81, 100, 121, 144])
>>> s[0]
0
>>> s[11]
121
>>> s[0:3]
array([0, 1, 4])
>>> s[0], s[11], s[0:3]
(0, 121, array([0, 1, 4]))
>>> s[-4:]
array([ 81, 100, 121, 144])
>>> s[-4:-1]
array([ 81, 100, 121])
>>> s[-4::-1]
array([81, 64, 49, 36, 25, 16, 9, 4, 1, 0])
>>> r = np.arange(36)
>>> r.resize((6, 6))
>>> r
array([[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11],
[12, 13, 14, 15, 16, 17],
[18, 19, 20, 21, 22, 23],
[24, 25, 26, 27, 28, 29],
[30, 31, 32, 33, 34, 35]])
>>>
>>> r[2, 2]
14
>>> r[3, 3:6]
array([21, 22, 23])
>>> r[3, 3:7]
array([21, 22, 23])
>>> r[:2, :-1]
array([[ 0, 1, 2, 3, 4],
[ 6, 7, 8, 9, 10]])
>>> r[:-1, ::2]
array([[ 0, 2, 4],
[ 6, 8, 10],
[12, 14, 16],
[18, 20, 22],
[24, 26, 28]])
>>> r[r > 30]
array([31, 32, 33, 34, 35])
>>> r[r > 20]
array([21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35])
>>> r[r > 20] = 20
>>> r
array([[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11],
[12, 13, 14, 15, 16, 17],
[18, 19, 20, 20, 20, 20],
[20, 20, 20, 20, 20, 20],
[20, 20, 20, 20, 20, 20]])
>>> r = np.arange(36)
>>> r.resize((6, 6))
>>> r
array([[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11],
[12, 13, 14, 15, 16, 17],
[18, 19, 20, 21, 22, 23],
[24, 25, 26, 27, 28, 29],
[30, 31, 32, 33, 34, 35]])
>>>
>>> r2 = r[2:4, 2:4]
>>> r2
array([[14, 15],
[20, 21]])
>>>
>>> r2[:] = -1
>>> r2
array([[-1, -1],
[-1, -1]])
>>> r
array([[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11],
[12, 13, -1, -1, 16, 17],
[18, 19, -1, -1, 22, 23],
[24, 25, 26, 27, 28, 29],
[30, 31, 32, 33, 34, 35]])
r2 = r [2: 4, 2: 4]
wird die Referenz an r2
übergeben, also bedeutet das Bearbeiten von r2
das Bearbeiten von r
. ..>>> r = np.arange(36)
>>> r.resize((6, 6))
>>> r
array([[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11],
[12, 13, 14, 15, 16, 17],
[18, 19, 20, 21, 22, 23],
[24, 25, 26, 27, 28, 29],
[30, 31, 32, 33, 34, 35]])
>>>
>>> r2 = r[2:4, 2:4].copy()
>>> r2
array([[14, 15],
[20, 21]])
>>>
>>> r2[:] = -1
>>> r2
array([[-1, -1],
[-1, -1]])
>>> r
array([[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11],
[12, 13, 14, 15, 16, 17],
[18, 19, 20, 21, 22, 23],
[24, 25, 26, 27, 28, 29],
[30, 31, 32, 33, 34, 35]])
r2 = r [2: 4, 2: 4] .copy ()
wird das von r
kopierte neue Array an r2
übergeben, sodass r2
und r
getrennt sind. Objekt. Das Bearbeiten von "r2" wirkt sich nicht auf "r" aus.>>> r = np.random.randint(0, 10, (4, 3))
>>> r
array([[1, 6, 3],
[3, 6, 0],
[4, 9, 3],
[5, 9, 3]])
>>>
>>> for row in r:
... print(row)
...
[1 6 3]
[3 6 0]
[4 9 3]
[5 9 3]
>>>
>>> for i, row in enumerate(r):
... print(i, ' : ', row)
...
0 : [1 6 3]
1 : [3 6 0]
2 : [4 9 3]
3 : [5 9 3]
>>> r
array([[1, 6, 3],
[3, 6, 0],
[4, 9, 3],
[5, 9, 3]])
>>> r2 = r ** 2
>>> r2
array([[ 1, 36, 9],
[ 9, 36, 0],
[16, 81, 9],
[25, 81, 9]])
>>> for x, y, z in zip(r, r2, r):
... print(x, y, z)
...
[1 6 3] [ 1 36 9] [1 6 3]
[3 6 0] [ 9 36 0] [3 6 0]
[4 9 3] [16 81 9] [4 9 3]
[5 9 3] [25 81 9] [5 9 3]
Pandas
Series
>>> s = pd.Series([168, 180, 174, 190, 170, 185, 179, 181, 175, 169, 182, 177, 180, 171])
>>>
>>> pd.cut(s, 3)
0 (167.978, 175.333]
1 (175.333, 182.667]
2 (167.978, 175.333]
3 (182.667, 190]
4 (167.978, 175.333]
5 (182.667, 190]
6 (175.333, 182.667]
7 (175.333, 182.667]
8 (167.978, 175.333]
9 (167.978, 175.333]
10 (175.333, 182.667]
11 (175.333, 182.667]
12 (175.333, 182.667]
13 (167.978, 175.333]
dtype: category
Categories (3, object): [(167.978, 175.333] < (175.333, 182.667] < (182.667, 190]]
>>>
>>> pd.cut(s, 3, labels=['Small', 'Medium', 'Large'])
0 Small
1 Medium
2 Small
3 Large
4 Small
5 Large
6 Medium
7 Medium
8 Small
9 Small
10 Medium
11 Medium
12 Medium
13 Small
dtype: category
Categories (3, object): [Small < Medium < Large]
Dataframe
Beispieltabelle für die Olympischen Spiele aller Zeiten wird als Beispieldaten verwendet.
>>> df[df['Gold'] == max(df['Gold'])].index[0]
'United States'
>>> df[(df['Gold'] > 0) & (df['Gold.1'] > 0)]
Das Folgende wird als Beispieldaten verwendet. / Die Beispieldaten lauten wie folgt:
>>> import pandas as pd
>>> staff_df = pd.DataFrame([{'Name': 'Kelly', 'Role': 'Director of HR'},
... {'Name': 'Sally', 'Role': 'Course liasion'},
... {'Name': 'James', 'Role': 'Grader'}])
>>> staff_df = staff_df.set_index('Name')
>>> student_df = pd.DataFrame([{'Name': 'James', 'School': 'Business'},
... {'Name': 'Mike', 'School': 'Law'},
... {'Name': 'Sally', 'School': 'Engineering'}])
>>> student_df = student_df.set_index('Name')
>>>
>>> staff_df
Role
Name
Kelly Director of HR
Sally Course liasion
James Grader
>>>
>>> student_df
School
Name
James Business
Mike Law
Sally Engineering
Erhalten Sie Daten darüber, wer Student oder Mitarbeiter ist
>>> pd.merge(staff_df, student_df, how='outer', left_index=True, right_index=True)
Role School
Name
James Grader Business
Kelly Director of HR NaN
Mike NaN Law
Sally Course liasion Engineering
Holen Sie sich Daten darüber, wer Student und Mitarbeiter ist
>>> pd.merge(staff_df, student_df, how='inner', left_index=True, right_index=True)
Role School
Name
James Grader Business
Sally Course liasion Engineering
Mitarbeiterdaten abrufen. Wenn das Personal auch Schüler ist, erhalten Sie auch Schuldaten. / Daten darüber abrufen, wer Mitarbeiter ist. Wenn das Personal auch Schüler ist, erhalten Sie die Daten der Schule.
>>> pd.merge(staff_df, student_df, how='left', left_index=True, right_index=True)
Role School
Name
Kelly Director of HR NaN
Sally Course liasion Engineering
James Grader Business
Holen Sie sich Studentendaten. Wenn der Schüler auch Mitarbeiter ist, erhalten Sie auch Rollendaten. / Daten darüber abrufen, wer Schüler ist. Wenn der Schüler auch Mitarbeiter ist, Daten über die Rolle abrufen.
>>> pd.merge(staff_df, student_df, how='right', left_index=True, right_index=True)
Role School
Name
James Grader Business
Mike NaN Law
Sally Course liasion Engineering
>>> products = pd.DataFrame([{'Product ID': 4109, 'Price': 5.0, 'Product': 'Suchi Roll'},
... {'Product ID': 1412, 'Price': 0.5, 'Product': 'Egg'},
... {'Product ID': 8931, 'Price': 1.5, 'Product': 'Bagel'}])
>>> products = products.set_index('Product ID')
>>> products
Price Product
Product ID
4109 5.0 Suchi Roll
1412 0.5 Egg
8931 1.5 Bagel
>>> invoices = pd.DataFrame([{'Customer': 'Ali', 'Product ID': 4109, 'Quantity': 1},
... {'Customer': 'Eric', 'Product ID': 1412, 'Quantity': 12},
... {'Customer': 'Anda', 'Product ID': 8931, 'Quantity': 6},
... {'Customer': 'Sam', 'Product ID': 4109, 'Quantity': 2}])
>>> invoices
Customer Product ID Quantity
0 Ali 4109 1
1 Eric 1412 12
2 Anda 8931 6
3 Sam 4109 2
>>>
>>> pd.merge(products, invoices, how='right', left_index=True, right_on='Product ID')
Price Product Customer Product ID Quantity
0 5.0 Suchi Roll Ali 4109 1
1 0.5 Egg Eric 1412 12
2 1.5 Bagel Anda 8931 6
3 5.0 Suchi Roll Sam 4109 2
>>> staff_df = pd.DataFrame([{'First Name': 'Kelly', 'Last Name': 'Desjardins', 'Role': 'Director of HR'},
... {'First Name': 'Sally', 'Last Name': 'Brooks', 'Role': 'Course liasion'},
... {'First Name': 'James', 'Last Name': 'Wilde', 'Role': 'Grader'}])
>>> student_df = pd.DataFrame([{'First Name': 'James', 'Last Name': 'Hammond', 'School': 'Business'},
... {'First Name': 'Mike', 'Last Name': 'Smith', 'School': 'Law'},
... {'First Name': 'Sally', 'Last Name': 'Brooks', 'School': 'Engineering'}])
>>> staff_df
First Name Last Name Role
0 Kelly Desjardins Director of HR
1 Sally Brooks Course liasion
2 James Wilde Grader
>>> student_df
First Name Last Name School
0 James Hammond Business
1 Mike Smith Law
2 Sally Brooks Engineering
>>> pd.merge(staff_df, student_df, how='inner', left_on=['First Name','Last Name'], right_on=['First Name','Last Name'])
First Name Last Name Role School
0 Sally Brooks Course liasion Engineering
>>> df.groupby('A').agg('sum')
>>> df.groupby('A').agg({'B': sum})
Recommended Posts