24th
Seaborn
import seaborn as sns
%matplotlib inline
import pandas as pd
df = pd.read_csv('train.csv')
df = df.dropna(subset=['Age'])
sns.distplot(df['Age'])
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbcdd7e3d0>
data:image/s3,"s3://crabby-images/f3565/f35655d259ae16acbac3dc8387d2b318e791ef04" alt="png"
sns.set()
sns.distplot(df['Age'],bins=50)
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbcdd05850>
data:image/s3,"s3://crabby-images/a2888/a28880afc3da7d9adb123c9479851c0d41d457ff" alt="png"
See the distribution of two variables with sns.jointplot ()
sns.jointplot(x='Age', y='Fare', data=df)
<seaborn.axisgrid.JointGrid at 0x7fcbcdbe31d0>
data:image/s3,"s3://crabby-images/3e1f5/3e1f5efb74dd26aca94866f1760bc892b003ca24" alt="png"
sns.jointplot(x='Age', y='Fare', data=df, kind='hex')
<seaborn.axisgrid.JointGrid at 0x7fcbcdb2b890>
data:image/s3,"s3://crabby-images/51a39/51a39084ec7191772cc77d75fb94241226ccf625" alt="png"
(Important) Display the distribution of multiple columns in one shot with sns.pairplot ()
sns.pairplot(df[['Age', 'Fare', 'Pclass', 'Survived']], hue='Survived', kind='scatter', plot_kws={'alpha': 0.5})
/opt/anaconda3/lib/python3.7/site-packages/statsmodels/nonparametric/kde.py:487: RuntimeWarning: invalid value encountered in true_divide
binned = fast_linbin(X, a, b, gridsize) / (delta * nobs)
/opt/anaconda3/lib/python3.7/site-packages/statsmodels/nonparametric/kdetools.py:34: RuntimeWarning: invalid value encountered in double_scalars
FAC1 = 2*(np.pi*bw/RANGE)**2
<seaborn.axisgrid.PairGrid at 0x7fcbcda34990>
data:image/s3,"s3://crabby-images/1693a/1693a7310b0505d11cf330515dd619bde6698844" alt="png"
25th
sns.barplot () Make a "bar chart"
import numpy as np
import pandas as pd
import seaborn as sns
%matplotlib inline
df = pd.read_csv('train.csv')
df.head()
|
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
0 |
1 |
0 |
3 |
Braund, Mr. Owen Harris |
male |
22.0 |
1 |
0 |
A/5 21171 |
7.2500 |
NaN |
S |
1 |
2 |
1 |
1 |
Cumings, Mrs. John Bradley (Florence Briggs Th... |
female |
38.0 |
1 |
0 |
PC 17599 |
71.2833 |
C85 |
C |
2 |
3 |
1 |
3 |
Heikkinen, Miss. Laina |
female |
26.0 |
0 |
0 |
STON/O2. 3101282 |
7.9250 |
NaN |
S |
3 |
4 |
1 |
1 |
Futrelle, Mrs. Jacques Heath (Lily May Peel) |
female |
35.0 |
1 |
0 |
113803 |
53.1000 |
C123 |
S |
4 |
5 |
0 |
3 |
Allen, Mr. William Henry |
male |
35.0 |
0 |
0 |
373450 |
8.0500 |
NaN |
S |
sns.barplot(x='Survived', y='Age', data=df) #Average value
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbcdd7e390>
data:image/s3,"s3://crabby-images/8cfbf/8cfbfb8ead357ebc4bbf79b10159d98a89078dda" alt="png"
sns.barplot(x='Survived', y='Age', data=df, estimator=np.median) #Median
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbcce3a190>
data:image/s3,"s3://crabby-images/c71ba/c71bac9ae6252dc8534920455e8516ea7cac408f" alt="png"
sns.countplot () Compare the number of data
sns.countplot(x='Sex', data=df, hue='Survived')
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbccfb0c10>
data:image/s3,"s3://crabby-images/22817/228170ff8736dfc450865443b3b05db275e13ec8" alt="png"
sns.boxplot () Compare values by category
sns.boxplot(x='Pclass', y='Age', data=df)
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbccd82750>
data:image/s3,"s3://crabby-images/be63f/be63ff0849ba591bfd902f3f0e8960b41b94a510" alt="png"
sns.boxplot(x='Pclass', y='Age', data=df, hue='Survived')
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbcc2a4ed0>
data:image/s3,"s3://crabby-images/53f40/53f404b2d8c842264b62b252dabeebfa6286e13d" alt="png"
sns.violonplot () Visualize data analysis
sns.violinplot(x='Pclass', y='Age', data=df)
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbcc1e3ad0>
data:image/s3,"s3://crabby-images/7661f/7661f81b775a52177522474562af32876bb611ec" alt="png"
sns.violinplot(x='Pclass', y='Age', data=df, hue=('Survived'))
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbcc107210>
data:image/s3,"s3://crabby-images/2121f/2121f295c8037e6c230ea8cf89d78b6192da4015" alt="png"
sns.swarmplot () You can see the true distribution
sns.swarmplot(x='Pclass', y='Age', data=df)
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbcc19a710>
data:image/s3,"s3://crabby-images/5b876/5b8765a0f2412320c8cf7867b0b9211be102b8dc" alt="png"
sns.swarmplot(x='Pclass', y='Age', data=df, size=4, hue='Survived')
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbc6e9fe50>
data:image/s3,"s3://crabby-images/d3874/d38748224b5231b97f3a0f4a823c2224da9f75f0" alt="png"
26th
Draw a Heatmap
Make a correlation with df.corr ()
import pandas as pd
df = pd.read_csv('train.csv')
corr = df.corr()
corr
|
PassengerId |
Survived |
Pclass |
Age |
SibSp |
Parch |
Fare |
PassengerId |
1.000000 |
-0.005007 |
-0.035144 |
0.036847 |
-0.057527 |
-0.001652 |
0.012658 |
Survived |
-0.005007 |
1.000000 |
-0.338481 |
-0.077221 |
-0.035322 |
0.081629 |
0.257307 |
Pclass |
-0.035144 |
-0.338481 |
1.000000 |
-0.369226 |
0.083081 |
0.018443 |
-0.549500 |
Age |
0.036847 |
-0.077221 |
-0.369226 |
1.000000 |
-0.308247 |
-0.189119 |
0.096067 |
SibSp |
-0.057527 |
-0.035322 |
0.083081 |
-0.308247 |
1.000000 |
0.414838 |
0.159651 |
Parch |
-0.001652 |
0.081629 |
0.018443 |
-0.189119 |
0.414838 |
1.000000 |
0.216225 |
Fare |
0.012658 |
0.257307 |
-0.549500 |
0.096067 |
0.159651 |
0.216225 |
1.000000 |
plot Heatmap with sns.heatmap ()
sns.heatmap(corr)
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbc6df9850>
data:image/s3,"s3://crabby-images/61965/61965cb74371b16460dc8b47afe2a5b058a49a7b" alt="png"
sns.heatmap(corr, cmap='coolwarm', annot=True)
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbcd0b3290>
data:image/s3,"s3://crabby-images/67428/674285056552aeba04b3249f0de7940b098a0df8" alt="png"
Sns.heatmap () to help you get a bird's eye view of your data
flights = sns.load_dataset('flights')
print(len(flights))
flights.head()
144
|
year |
month |
passengers |
0 |
1949 |
January |
112 |
1 |
1949 |
February |
118 |
2 |
1949 |
March |
132 |
3 |
1949 |
April |
129 |
4 |
1949 |
May |
121 |
# pivot_Create table
flights_pivot = flights.pivot_table(index='month', columns='year', values='passengers')
flights_pivot
year |
1949 |
1950 |
1951 |
1952 |
1953 |
1954 |
1955 |
1956 |
1957 |
1958 |
1959 |
1960 |
month |
|
|
|
|
|
|
|
|
|
|
|
|
January |
112 |
115 |
145 |
171 |
196 |
204 |
242 |
284 |
315 |
340 |
360 |
417 |
February |
118 |
126 |
150 |
180 |
196 |
188 |
233 |
277 |
301 |
318 |
342 |
391 |
March |
132 |
141 |
178 |
193 |
236 |
235 |
267 |
317 |
356 |
362 |
406 |
419 |
April |
129 |
135 |
163 |
181 |
235 |
227 |
269 |
313 |
348 |
348 |
396 |
461 |
May |
121 |
125 |
172 |
183 |
229 |
234 |
270 |
318 |
355 |
363 |
420 |
472 |
June |
135 |
149 |
178 |
218 |
243 |
264 |
315 |
374 |
422 |
435 |
472 |
535 |
July |
148 |
170 |
199 |
230 |
264 |
302 |
364 |
413 |
465 |
491 |
548 |
622 |
August |
148 |
170 |
199 |
242 |
272 |
293 |
347 |
405 |
467 |
505 |
559 |
606 |
September |
136 |
158 |
184 |
209 |
237 |
259 |
312 |
355 |
404 |
404 |
463 |
508 |
October |
119 |
133 |
162 |
191 |
211 |
229 |
274 |
306 |
347 |
359 |
407 |
461 |
November |
104 |
114 |
146 |
172 |
180 |
203 |
237 |
271 |
305 |
310 |
362 |
390 |
December |
118 |
140 |
166 |
194 |
201 |
229 |
278 |
306 |
336 |
337 |
405 |
432 |
sns.heatmap(flights_pivot)
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbc5baabd0>
data:image/s3,"s3://crabby-images/ba1e1/ba1e10962789e53b6a97252aa3a902a9c7d34cd4" alt="png"
27th
Change basic style with sns.set ()
Specify the usage with the context argument
import pandas as pd
import seaborn as sns
%matplotlib inline
df = pd.read_csv('train.csv')
sns.set(context=('poster'))
df = df.dropna(subset=['Age'])
sns.distplot(df['Age'])
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbc568f690>
data:image/s3,"s3://crabby-images/edc07/edc07b2607825cc54caf8a458cdfcd5b02c76623" alt="png"
Specify the style of the entire graph with the style argument
sns.set_style(style='whitegrid') #Change background color
sns.distplot(df['Age'])
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbc563d850>
data:image/s3,"s3://crabby-images/79a87/79a878d342824bd8bdfbc85489f6091da72f36a9" alt="png"
Specify color with palette argument
sns.set(palette='bright')
sns.violinplot(x='Pclass', y='Age', data=df)
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbc5472850>
data:image/s3,"s3://crabby-images/3f4b3/3f4b3478b00b02731c5bfb1060ef56578dadaf4e" alt="png"
Drop axes and frames with sns.despine ()
sns.set(palette='bright' ,style='ticks') ##style='ticks'With axis to plot
sns.violinplot(x='Pclass', y='Age', data=df)
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbc53ed810>
data:image/s3,"s3://crabby-images/16045/1604598404f8ac8e4752c7d74012129a8ba23ebd" alt="png"
sns.set(palette='bright' )
sns.violinplot(x='Pclass', y='Age', data=df)
sns.despine()
data:image/s3,"s3://crabby-images/4b955/4b9552e4dafdeac05f422e123e858f5f8a29087d" alt="png"
Like matplotlib, you can do many things with the plt module
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 5))
sns.distplot(df['Age'])
<matplotlib.axes._subplots.AxesSubplot at 0x7fcbc52d8750>
data:image/s3,"s3://crabby-images/2a403/2a4038b6caccc52ddab1c72f2051776ec542bdde" alt="png"
sns.distplot(df['Age'])
plt.savefig('seaborn_sample.png')
data:image/s3,"s3://crabby-images/03da6/03da640365cc4fc919d99a76def8e9976ed6901e" alt="png"