[PYTHON] Implement "Data Visualization Design # 3" with pandas and matplotlib

What is a data visualization design?

This is a summary of data visualization points published in note by Go Ando of THE GUILD, who is famous for services that focus on UX and UI.


What about # 1 and # 2?


plt.rcParams['font.family'] = 'Hiragino Sans'  

It is the part of.

13. It is effective to put a graph in the table


Use pandas.

import pandas as pd
import numpy as np

%matplotlib inline
# data
apple_products = pd.DataFrame({"product":["iPhone","iPad","Mac","Services","Other"],
                              "Earnings(M dollars)":[141319,19222,25859,29980,12863],

#Value format
format_dict = {'Earnings(M dollars)':'{0:,.0f}', 'unit':'{0:,.0f}'}

#Display while setting the graph
 .bar(color="#99ceff", vmin=0, subset=['Earnings(M dollars)'], align='zero')
 .bar(color="#ff999b", vmin=0, subset=['unit'], align='zero'))

スクリーンショット 2019-12-03 21.55.48.png

15. Stacked graphs are useful for comparing percentages


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

#For error avoidance in pandas
from pandas.plotting import register_matplotlib_converters

#Font settings
plt.rcParams['font.family'] = 'Hiragino Sans'  
plt.rcParams['font.weight'] = 'heavy'

music_env_df = pd.DataFrame({"radio":[0.4,0.21],"CD / Download":[0.22,0.44],"Video distribution":[0.2,0.3],"Music distribution":[0.18,0.05]},
#For stacked graphs
#Graph color
bar_colors = ["#3B7780","#98C550","#7FC2CB","#E9C645"]

#get x ticklabel
x = music_env_df.index
#Each item name in the graph
keys = music_env_df.keys()

fig,ax = plt.subplots(figsize=(7,7))

# 1.Erase the left and right frames
sides = ['left','right']
[ax.spines[side].set_visible(False) for side in sides] 

# 2.Left axis memory, memory label deleted
ax.tick_params(left=False, labelleft=False)

# 3.Change the color of the upper and lower borders

# 4.x-axis memory settings
ax.tick_params(axis='x', labelsize='x-large',color="dimgray",labelcolor="dimgray")

# 5.Plot stacked graphs and store plot information
bar_info = []
for i in range(len(keys)):
    if i == 0:
        bar_info.append(ax.bar(x, music_env_df.T.iloc[i],width=0.5,color=bar_colors[i]))
        bar_info.append(ax.bar(x, music_env_df.T.iloc[i], bottom=music_env_df.T.cumsum().iloc[i-1],width=0.5,color=bar_colors[i]))

# 6.Items of each graph
for i,one in enumerate(bar_info):
    # %Store the number of
    bar_center = [[0,0],[0,0]]
    #Stores line coordinates that emphasize differences between bar charts
    bar_line = [[0,0],[0,0]]
    for j,one_bar in enumerate(one):
        bar_center[j][0] =  one_bar.xy[0]+one_bar.get_width()/2
        bar_center[j][1] =  one_bar.xy[1]+one_bar.get_height()/2
        #Display item name
        if j == 0:    
            bar_line[j][0] = one_bar.xy[0]+one_bar.get_width()
            bar_line[j][1] = one_bar.xy[1]
            bar_line[j][0] = one_bar.xy[0] - bar_line[0][0]
            bar_line[j][1] = one_bar.xy[1] - bar_line[0][1]
        #Display the percentage value (%)
        #Show highlighted lines
        ax.arrow(bar_line[0][0],bar_line[0][1], bar_line[1][0], bar_line[1][1], head_width=0, head_length=0, ec='dimgray')
# 7.Set the vertical axis area


Extra edition # 1


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

#For error avoidance in pandas
from pandas.plotting import register_matplotlib_converters

#Font settings
plt.rcParams['font.family'] = 'Hiragino Sans'  
plt.rcParams['font.weight'] = 'heavy'

icing_method = pd.DataFrame([0.35, 0.19, 0.13,0.05,0.03,0.02],
                           index=['Ice bath 2 ℃', 'Ice bath 8 ℃', 'Watering+Ice massage','Watering','Fan','Vein icing'],
                           columns=['Cooling speed'])

icing_detail = ["Immerse the whole body in an ice bath at 2 ℃","Immerse the whole body in an ice bath at 8 ℃","12 ℃ watering+Ice massage",
                "Continue to apply tap water at 15 ℃ to the whole body","It hits the wind of an electric fan at room temperature of 22 ° C.","(Neck / Axilla / Inguinal)"]

#Only the color of the bar is specified as the original
ori_blue = "#71C0F9"

fig, ax = plt.subplots(figsize=(12, 6))

icing_method.plot.barh(legend=False, ax=ax, width=0.8,color=ori_blue)

# 1.Title setting
plt.title("Cooling method and cooling speed",fontsize=24,fontweight='bold',color="dimgray")

# 2.Make a lot of margin on the left

# 3.Reverse the order of the y-axis

# 4.Erase other than the left frame
sides = ['right', 'top', 'bottom']
[ax.spines[side].set_visible(False) for side in sides] 

# 5.y-axis x-axis tick,Erase the y-axis tick label
ax.tick_params(bottom=False, left=False,labelleft=False)

# 6.x-axis value label setting
ax.set_xticks([i*0.1 for i in range(5)])
ax.tick_params(axis='x', labelcolor="silver")

# 7.x-axis range setting (0.Up to 4 x=0.Because the grid of 4 does not come out)

# 8.x-axis grid settings

# 9.x-axis label setting
ax.set_xlabel("Body temperature drops per 10 seconds (℃)",fontsize="x-large",fontweight="bold",color="silver")

# 10.Show actual values on the right side of the bar, items and supplementary descriptions on the right side
vmax = icing_method['Cooling speed'].max()
for i, (value,main_label,sub_label) in enumerate(zip(icing_method['Cooling speed'],icing_method.index,icing_detail)):
    ax.text(value+vmax*0.02, i, f'{value:,} ℃', fontsize='x-large', va='center', color=ori_blue)
    ax.text(-0.01, i-0.1,main_label  , fontsize='xx-large', va='center',ha='right',color="dimgray")
    ax.text(-0.01,i+0.25, sub_label, fontsize='x-large' ,va='center',ha='right', color="silver")


