[PYTHON] Code convention, method correspondence table

#python

--sql

[]( "Order to display on this page" python pandas numpy matplotlib Other sql R )

[]( How to write this page

# # # # Heading: Example "Code convention", "Method correspondence table"

# # Heading: "Extract Columns", "Extract Some Records"

‘‘‘ python code comment ・ Write all comments here, not in the text )

Code conventions_rules for each language

python

python_ code convention
#python
df_used_method1_used_method2\
    = df.copy()\
        .used_method1()\
        .used_method2()\
How to write .py
1.Description of the function itself
"""Write a description here"""

2.Library import
import pandas as pd

3.Variable specification
START_DATE = 20200527

4.main()Definition of the function used in
def func1(parameter1:str) -> pd.DataFrame:

5.main()Definition of
def main()

6.
if __name__ == "main":
    main()
How to write a function
def func1(parameter_path:str, parameter_date:int, parameter_df:pd.DataFrame) -> pd.DataFrame:
    """
Write a docstring
    """

How to write argparse
#main()It seems necessary when taking an argument in
How to write logging

SQL

--sql
select
    datamart1.column1
    , datamart1.column2
    , datamart2.column3
from
    datamart1
        left join
    datamart2
        on = column2
where
    datamart1.column1 > 10
;

select
    column1
    , column2
    , sum(column3)
    , avg(column4)
from
    datamart1
group by
    column1
    , column2
;

SQL rules

Method correspondence table_data analysis related

Data understanding / data preparation of table data

Verification of whether the key is unique
#python

--sql

Verification of duplicate records
#python

--sql

Extraction of columns
#python

#Extracted in series format
df.loc[:,'column']

df['column']

df.column

#Extract in data frame format
df.loc[:,['column1','column2']]

df[['column1','column2']]
--sql
select
    column1
from
    datamart
;

select
    *
from
    datamart
;
Extract some records
#python
df.head(10)
--sql
select top(10)
    column1
from
    datamart
;

select
    column1
from
    datamart
limit
    10
;
Extract columns without duplication
#python
df.drop_duplicates()
--sql
select unique
    *
from
    datamart
;
Add column
#python
df_assign\
    = df.copy()\
        .assign(column = 10)

df['column'] = 10
--sql
select
    10 as column1
from
    dataframe
;
Extract records that match the conditions
#python
df_query_loc\
    = df.copy()\
        .query('column1 == 10')\
        .loc[:,['column1','column2']]

df[df.column1 == 10, ['column1','column2']]
--sql
select
    column1
from
    datamart1
where
    column1 == 10
;

select
    column1
    , count(column1)
from
    datamart1
group by
    column1
having
    count(column1) == 10
;
Change column name
#python
df.rename(columns = {'column1':'new_column1','column2':'new_column2'})

pd.DataFrame(df.values, columns = ['new_column1','new_column2'])

df.columns = ['new_column1','new_column2']

df.columns = 'new_' + df.columns
--sql
select
    column1 as new_column1
from
    datamart1
;
Combine df
#python
pd.merge(
    df1,
    df2,
    on = ['column1','column2'],
    how = 'left')

pd.merge(
    df1,
    df2,
    left_on  = ['column1','column2'],
    right_on = ['column3','column4'],
    how = 'left')

df1.merge(
    df2,
    on = 'column1',
    how = 'left')
--sql
select
    datamart1.column1
    , datamart1.column2
    , datamart2.column3
from
    datamart1
        left join
    datamart2
        on = column2
Concat that combines multiple data into one data
#python_pandas
#Vertical
pd.concat([df1,df2],axis=0)

df1.append(df2)

#side
pd.concat([df1,df2],axis=1)
#python_numpy
#Vertical
np.concatenate([nparray1,nparray2], axis=0)

#side
np.concatenate([nparray1,nparray2], axis=1)
--sql
select
    column1
from
    datamart1
union -- union all / intersect / except
select
    column2
from
    datamart2
Row count
#python
len(df)
--sql
select
    count(*)
from
    datamart1
;
Check the nature of the data
#python
#Shape check
np.array().shape
Sorting
#python
df.sort_values()
--sql
select
    *
from
    datamart1
order by
    column1
Processing window functions
#python

--sql

Generate data
#python
#Generate nparray with all the same values
#Example, 3D
np.tile(Numbers, (n, n, n))

List comprehension
[i for i in range(100)]

#When using an if statement
[i for i in range(100) if i > 50]
Aggregation
#python
df\
    .groupby(['column1','column2'])\
    .agg({'column3':'sum',
          'column4':'mean'})\
    .reset_index()
-- sql
select
    column1
    , column2
    , sum(column3)
    , avg(column4)
from
    datamart1
group by
    column1
    , column2
;
Count only specific elements when aggregating
df_tmp\
    .groupby('column1')\
    .agg({'column2':'nunique'})
Perform multiple operations on one column during aggregation
df_tmp\
    .groupby('column1')\
    .agg({'column2':['sum', 'min', 'max']})
Do the calculation
#python
#average
np.average(nparray, axis = n)
#For data of two or more dimensions, the result depends on the average dimension

np.array().mean()

#division
np.array(nparray1, nparray2)
Split a string with a specific symbol
#Get in list format
str_data.split('Specific string')
Extract the last n characters of a string
str_data[-n]
Delete the last n characters of the string
str_data[:-n]
Check if the text data contains a specific character string
str_data.find('The string to look for')
#If the string is included"1"If not included"-1"

Image data processing

Loading image data
#python
import cv2
cv2.imread('filename.png')
#python
import PIL
PIL.image.open('filename.png')
Display of image data
#python
cv2.imshow("image_name", cv2.imread('filename.png'))
#"image_name"Is optionally set
#python
PIL.image.open('filename.png').show()
Image data conversion
#python
#Image data np conversion
image = PIL.image.open('filename.png')
np.array(image)
Extracting part of the image
#python
np.array(image)[Top pixel:Lower pixel,Left pixel:Right pixel]
Add to image
#python

#Add a square
plt.figure()
plt.grid(false)
cv2.rectangle(np.array(image), (Right pixel,Top pixel), (Left pixel,Lower pixel), (255, 255, 255), 4)
plt.imshow(np.array(image),cmap='gray',vmin=0,vmax=65555)
Image conversion

signal.convolve2D

Support for TIF format

#Change the number of images to extract with the last number
Image.open('~.tif').seek(0)

Visualization process

Image size to display
#python
plt.figure(figsize = (6,4))
Specifying the range of the graph to be displayed
#python
plt.x_lim([right,left])
plt.y_lim([Up,under])
Make the image to be displayed square
#python
aspect = (ax.get_x_lim()[1] - ax.get_xlim()[1]) / (ax.get_y_lim()[1] - ax.get_y_lim()[0])
ax.set_aspect(aspect)
Make the background transparent
#python
fig.patch.set_alpha(0)
#Only fig is supported
Correspondence of grid lines
#python

#Do not display grid lines

Display of multiple images
#python

#Use 1, subplot,
plt.subplot(Vertical,side,1)
plt.plot(~)
plt.subplot(Vertical,side,2)
plt.plot(~)
↓
plt.subplot(Vertical,side,Vertical×side)
plt.show(~)

#2、add_Use subplot,
ax1 = plt.figure().add_subplot(Vertical,side,1)
ax1 = plot(~)
↓
plt.show()

#3, use subplots,
fig, axes = plt.subplots(~, figsize=(6,4)) #fig, is not used
axes[1].plot


Display Japanese
#python
#Process using the library

modeling

Method correspondence table_coding general

Processing of time data

Calculation of processing time
#python
import time
#Beginning of processing
time_start = time.time()
#End of processing
time_end   = time.time()
print(time_end-time_start)
Convert date and time data from character type to time type
import datetime
from dateutil.relativedelta import relativedelta

date_datetime = datetime.datetime.strptime('2020-05-24', '%Y-%m-%d')
Convert date and time data from character type to time type
import datetime
from dateutil.relativedelta import relativedelta

datetime.datetime.strftime(date_datetime, '%Y-%m-%d')
Difference of date and time data
date_datetime + datetime.timedelta(days=7)
date_datetime + relativedelta(weeks = 1)
Get day of the week information from date and time data
datetime.weekday(date_datetime)

Path processing

Loading a function from a py file
#python
#directory"src"Read the function from the following file
import sys
sys.path.append('src/')
from filename.py import func_name
Read data on Windows server
#python
import pathlib
filepath = r'path_file name.csv'
read_file = pathlib.WindowPath()
Extract files in a specific directory
import glob
glob.glob('path/*')
Create a path if there is no path
import os

if os.path.exists(path_name) == False
    os.makedirs(path_name)

Load and save files

Read file
#python_pandas

#Read df
pd.DataFrame(data,columns=[column1,column2])

pd.read_csv('filename')
#python_numpy

#Loading npz
np.load(filename)
Save file
#python_pandas

#Read df
pd.DataFrame().to_csv('filename', index=False)
#python_numpy

#Save npz
np.savez(pathname, filename1 = np.array('1'), filename2 = np.array('2'))
#python_matplotlib
#Save image
plt.savefig('filename')
#python_sklearn
#Save model
import sklearn
import pickle
reg = sklearn.LinearRegression().fit(x,y)
with open('filename.pickle', mode = 'wb', as f
    pickle.dump(reg, f)

Iteration processing

Multi-element iteration
#zip
for i_column1, i_column2 in zip(df_tmp.column1, df_tmp.column2):

#itertools
for i,j in 
    permutations(i,j):permutation
    combinations(i,j):combination
    products(i,j):Cartesian product
Enter the iteration number

for i in enumerate(df_tmp.column):

About functions / classes

function
#def, use
def_func_name(parameter1, parameter2):
processing
return Return value

#Use lambda,
lambda parameter:Return value formula(parameter)

#Use lambda,_Expressed in one line
(lambda parameter:Return value formula(parameter))(parameter_instance)
class

Recommended Posts

Code convention, method correspondence table
Nintendo Code Puzzle Table 1st question
Anaconda and Python version correspondence table
IPython and pry command correspondence table