main.py
a=""" """
#During this time, copy the sequence of numbers from Excel etc.
import collections
import matplotlib.pyplot as plt
a=a.split("\n")
a = [x for x in a if x != '']
#Remove null
l_head=list(map(lambda x: str(x)[0], a))
#Get the first character
l_count=collections.Counter(l_head).most_common(9)
#9th from the top because it may contain 0
#l_count.sort(key=lambda x: x[0])
#sort
#When graphed, it is sorted in the order of 123456789 or the frequency of appearance. Every 123 when removed.
l_count
l_rate=list(map(lambda x: x*100 /sum(list(zip(*l_count))[1]), list(zip(*l_count))[1]))
#Percentage calculation
l_index=list(zip(*l_count))[0]
l_rate
plt.xticks([0.15, 1.15, 2.15, 3.15, 4.15, 5.15, 6.15, 7.15, 8.15], l_index)
plt.bar(range(9), l_rate, color='g', width=0.3, label='Selected_data', align="center")
ben={1:30.1, 2:17.6, 3:12.5, 4:9.7, 5:7.9, 6:6.7, 7:5.8, 8:5.1, 9:4.6}
ben_label= list(map(lambda x: ben[int(x)], l_index))
plt.bar([0.3,1.3,2.3,3.3,4.3,5.3,6.3,7.3,8.3], ben_label, color='b', width=0.3, label='Benford_law', align="center")
plt.legend(bbox_to_anchor=(0.5, 1),loc=2)
plt.show()
Example: Population by administrative unit in Japan
Area for each administrative unit in Japan
Population density by administrative unit in Japan
Number of households per administrative unit in Japan
All the numbers reflected here
Statistics are from the statistics bureau figures https://www.stat.go.jp/data/index.html
Recommended Posts