Since scikit-learn only accepts numpy, it is necessary to convert it to a numpy array after processing data with pandas, and standard python, numpy, pandas are confused due to overlapping functions, so first summarize the basics of the basics.
python
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#import
import numpy as np
import scipy as py
import pandas as pd
import itertools as it
'''
Create
'''
#List creation
list_value = [10,11,12]
list_value
Out[374]: [10, 11, 12]
#Tuple creation
tuple_value = (10,11,12)
tuple_value
Out[375]: (10, 11, 12)
#Dictionary creation
dict_value = {0:10,1:11,2:12}
dict_value
Out[376]: {0: 10, 1: 11, 2: 12}
#Set creation
set_value = {10,11,12}
set_value
Out[377]: {10, 11, 12}
#numpy array creation
ndarray_value = np.array([10,11,12], dtype=np.int32)
ndarray_value
Out[378]: array([10, 11, 12], dtype=int32)
#pandas series creation
series_value = pd.Series({0:10,1:11,2:12})
series_value
Out[379]:
0 10
1 11
2 12
dtype: int64
#pandas data frame creation
dataframe_value = pd.DataFrame({'seq':[10,11,12]})
dataframe_value
Out[380]:
seq
0 10
1 11
2 12
'''
Type (class)
'''
#Integer type (class)
type(1)
Out[382]: int
#Decimal type (class)
type(0.1)
Out[383]: float
#String type (class)
type('abc')
Out[384]: str
#List type (class)
type(list_value)
Out[385]: list
#Tuple type (class)
type(tuple_value)
Out[386]: tuple
#Dictionary type (class)
type(dict_value)
Out[387]: dict
#Set type (class)
type(set_value)
Out[388]: set
#numpy array type (class)
type(ndarray_value)
Out[389]: numpy.ndarray
#pandas series type (class)
type(series_value)
Out[390]: pandas.core.series.Series
#pandas dataframe type (class)
type(dataframe_value)
Out[391]: pandas.core.frame.DataFrame
#numpy data type
ndarray_value.dtype
Out[392]: dtype('int32')
#Data type of pandas series
series_value.dtype
Out[393]: dtype('int64')
#Data type of pandas data frame * Error occurs
'''
Type conversion
'''
#Convert from string type to integer type
int('1')
Out[308]: 1
#Convert from string type to decimal type
float('1')
Out[309]: 1.0
#Convert from integer type to string type
str(1)
Out[310]: '1'
###############
#Convert to list#
###############
#From tuple
list(tuple_value)
Out[311]: [10, 11, 12]
#From dictionary * Not converted well
list(dict_value)
Out[312]: [0, 1, 2]
#From the set
list(set_value)
Out[313]: [10, 11, 12]
#From numpy array
list(ndarray_value)
Out[314]: [10, 11, 12]
#From pandas series
list(series_value)
Out[315]: [10, 11, 12]
#From pandas dataframe
list(dataframe_value.values.flatten())
Out[318]: [10, 11, 12]
#From pandas data frame * Not converted well
list(dataframe_value)
Out[316]: ['seq']
#From pandas data frame * Not converted well
list(dataframe_value.values)
Out[317]: [array([10]), array([11]), array([12])]
###############
#Conversion to tuples#
###############
#Conversion to tuple is the same as list
######################
#Conversion to dictionary#
######################
#From pandas series * Cannot convert from types other than pandas series to dictionary
dict(series_value)
Out[327]: {0: 10, 1: 11, 2: 12}
###############
#Conversion to set#
###############
#Conversion to set is the same as list
###################
#Conversion to numpy array#
###################
#From the list
np.array(list_value)
Out[354]: array([10, 11, 12])
#From tuple
np.array(tuple_value)
Out[355]: array([10, 11, 12])
#From dictionary * Not converted well
np.array(dict_value)
Out[356]: array({0: 10, 1: 11, 2: 12}, dtype=object)
#* Not converted well from the set
np.array(set_value)
Out[357]: array({10, 11, 12}, dtype=object)
#From the pandas series
np.array(series_value)
Out[358]: array([10, 11, 12])
#From pandas dataframe
In [231]: np.array(dataframe_value.values.flatten())
Out[231]: array([10, 11, 12])
#From pandas data frame * Not converted well
np.array(dataframe_value)
Out[359]:
array([[10],
[11],
[12]])
#From pandas data frame * Not converted well
np.array(dataframe_value.values)
Out[395]:
array([[10],
[11],
[12]])
#######################
#Conversion to pandas series#
#######################
#From the list
In [232]: pd.Series(list_value)
Out[232]:
0 10
1 11
2 12
dtype: int64
#From tuple
In [233]: pd.Series(tuple_value)
Out[233]:
0 10
1 11
2 12
dtype: int64
#From the dictionary
In [234]: pd.Series(dict_value)
Out[234]:
0 10
1 11
2 12
dtype: int64
#* Error from set
#From numpy array
In [236]: pd.Series(ndarray_value)
Out[236]:
0 10
1 11
2 12
dtype: int32
#From pandas dataframe
In [239]: pd.Series(dataframe_value.values.flatten())
Out[239]:
0 10
1 11
2 12
dtype: int64
#From pandas data frame * Not converted well
In [237]: pd.Series(dataframe_value)
Out[237]:
0 (s, e, q)
1 (s, e, q)
2 (s, e, q)
dtype: object
#* Error from pandas data frame
############################
#Convert to pandas dataframe#
############################
#From the list
In [240]: pd.DataFrame(list_value)
Out[240]:
0
0 10
1 11
2 12
#* Error from tuple
#* Error from dictionary
#* Error from set
#From numpy array
In [244]: pd.DataFrame(ndarray_value)
Out[244]:
0
0 10
1 11
2 12
#From the pandas series
In [245]: pd.DataFrame(series_value)
Out[245]:
0
0 10
1 11
2 12
##########################
#Data type conversion of numpy array#
##########################
#Convert from integer to string
In [246]: ndarray_value.astype(np.string_)
Out[246]:
array([b'10', b'11', b'12'],
dtype='|S11')
##############################
#Pandas series data type conversion#
##############################
#Convert from integer to string
In [247]: series_value.astype(np.string_)
Out[247]:
0 b'10'
1 b'11'
2 b'12'
dtype: bytes168
In [248]: dataframe_value.astype(np.string_)
Out[248]:
seq
0 b'10'
1 b'11'
2 b'12'
'''
Number of elements in the sequence
'''
#list
In [252]: len(list_value)
Out[252]: 3
#Tuple
In [253]: len(tuple_value)
Out[253]: 3
#dictionary
In [254]: len(dict_value)
Out[254]: 3
#set
In [255]: len(set_value)
Out[255]: 3
#numpy array
In [256]: len(ndarray_value)
Out[256]: 3
#pandas series
In [257]: len(series_value)
Out[257]: 3
#pandas dataframe
In [258]: len(dataframe_value)
Out[258]: 3
########
#dimension#
########
#numpy array
In [259]: ndarray_value.ndim
Out[259]: 1
In [260]: ndarray_value.shape
Out[260]: (3,)
#pandas series
In [261]: series_value.ndim
Out[261]: 1
In [262]: series_value.shape
Out[262]: (3,)
#pandas dataframe
In [263]: dataframe_value.ndim
Out[263]: 2
In [264]: dataframe_value.shape
Out[264]: (3, 1)
'''
Element reference
'''
#list
In [266]: list_value[0]
Out[266]: 10
In [267]: list_value[-3]
Out[267]: 10
In [268]: list_value[2]
Out[268]: 12
In [269]: list_value[-1]
Out[269]: 12
#Tuples are the same as lists
#dictionary
In [274]: dict_value[0]
Out[274]: 10
In [275]: dict_value[-3] #* An error will occur
In [276]: dict_value[2]
Out[276]: 12
In [277]: dict_value[-1] #* An error will occur
#Set does not support index lookup
#numpy array
In [279]: ndarray_value[0]
Out[279]: 10
In [280]: ndarray_value[-3]
Out[280]: 10
In [281]: ndarray_value[2]
Out[281]: 12
In [282]: ndarray_value[-1]
Out[282]: 12
#pandas series
In [283]: series_value[0]
Out[283]: 10
In [284]: series_value[-3] #* An error will occur
In [285]: series_value[2]
Out[285]: 12
In [286]: series_value[-1] #* An error will occur
#pandas dataframe
In [287]: dataframe_value[0] #* An error will occur
'''
Sum
'''
#Total by python standard
In [289]: sum(list_value)
Out[289]: 33
In [290]: sum(tuple_value)
Out[290]: 33
In [291]: sum(dict_value)
Out[291]: 3
In [292]: sum(set_value)
Out[292]: 33
In [293]: sum(ndarray_value)
Out[293]: 33
In [294]: sum(series_value)
Out[294]: 33
In [295]: sum(dataframe_value) #* An error will occur
#Total by numpy
In [296]: np.sum(list_value)
Out[296]: 33
In [297]: np.sum(tuple_value)
Out[297]: 33
In [298]: np.sum(dict_value) #*does not go well
Out[298]: {0: 10, 1: 11, 2: 12}
In [299]: np.sum(set_value) #*does not go well
Out[299]: {10, 11, 12}
In [300]: np.sum(ndarray_value)
Out[300]: 33
In [301]: np.sum(series_value)
Out[301]: 33
In [302]: np.sum(dataframe_value)
Out[302]:
seq 33
dtype: int64
'''
Combined with permutations
'''
#permutation
seq = ('A','B','C')
len(list(it.permutations(seq,2)))
Out[15]: 6
list(it.permutations(seq, 2))
Out[16]: [('A', 'B'), ('A', 'C'), ('B', 'A'), ('B', 'C'), ('C', 'A'), ('C', 'B')]
#combination
len(list(it.combinations(seq,2)))
Out[18]: 3
list(it.combinations(seq, 2))
Out[19]: [('A', 'B'), ('A', 'C'), ('B', 'C')]
#Cartesian product
seq1 = ('A','B')
seq2 = ('C','D')
len(list(it.product(seq1,seq2)))
Out[23]: 4
list(it.product(seq1,seq2))
Out[24]: [('A', 'C'), ('A', 'D'), ('B', 'C'), ('B', 'D')]
Recommended Posts