[PYTHON] Google form aggregate analysis tool

import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

def quantify(csv_pass):

About arguments
Enter the path to the csv file in the argument
    
About the return value
The first return value is the answer for each question and a hash that does not allow duplicate integers.
The second return value is the data frame in which the answer is replaced with a numerical value.
    
-Normalize column names with notational fluctuations
-Delete the time stamp column
    
    """
    csv_data  = pd.read_csv(csv_pass)
    df = pd.DataFrame(csv_data)
    num_respose = len(csv_data)
    print(num_respose)
   
    #Delete timestamp
    df=df.drop('Time stamp', axis=1)
    hashs={}
    answer_columns=df.columns
    n_suu=num_respose
    for cm in answer_columns:
        if cm !=  'Time stamp':
            #print(cm)
            answer_hash = {}
            n=0
            for i in range(n_suu):
                key=df[cm][i]
                if key in answer_hash:
                  s=2
                else:
                    answer_hash[key]=n
                    n = n+1
                if i == num_respose-1:
                    hashs[cm]=answer_hash
                    
    replaced_value = df
    for i in hashs:
        #print(i)
        replaced_value = replaced_value.replace({i: hashs[i]})
        
    return hashs,replaced_value

#Algorithm execution to quantify the answer by using csv as a data frame
hashs,replaced_value=quantify("./G1 Survey Form for Students.csv")

#Pie chart creation algorithm
def pie(replaced_value):
    """
Takes a data frame replaced by a numerical value as an argument
    """
    answer_columns=replaced_value.columns
    answer_columns
    for i in answer_columns:
        #Create a pie chart other than a time stamp
        if i !="Time stamp":
            column = i
            groupbyed_sizes=replaced_value.groupby(column).size()
            #groupbyed_sizes
            #Combine two for statements at once
            labels=[]
            values =[]
            for (value, labe) in zip(groupbyed_sizes, hashs[column]):
                #print(value,labe)
                labels.append(labe)
                values.append(value)
            print(column)
            print(labels)
            print(values)
            label = labels
            x = values
            fig, ax = plt.subplots()
            ax.pie(x, labels=label, autopct="%1.1f %%")
            plt.show()

def cross_tabulation(replaced_value):
    """
Do double cross tabulation
Replaced in argument_value(Quantified data frame)Take
A function that returns a simple summary table from the input values of two column names
    """
    print(replaced_value.columns)
    print("Enter the question you want to be on the front side from the column name above")
    columns=input()
    print("Enter the question you want to start from the column name above")
    index=input()
    
    def get_swap_dict(d):
        #A function that swaps hash keys and values
        return {v: k for k, v in d.items()}
    #Rename index and columns from numbers to wording
    d_swap = get_swap_dict(hashs[index])
    d_swap_2 = get_swap_dict(hashs[columns])

    s=pd.crosstab(replaced_value[columns],replaced_value[index])
    
    #Add total column
    s['Total'] = s.sum(axis=1)
    #↓ Display by percentage
    #s=pd.crosstab(replaced_value[columns],replaced_value[index],normalize=True)
    #index
    s=s.rename(d_swap_2,axis=0)
    #columns

    s=s.rename(d_swap,axis=1)
    print("")
    print("")
    print("")
    print("")
    print("")
    print("-------------------------------------------------------------------------------------------------")
    return s

#https://deepage.net/features/pandas-crosstab.html
#Cross tabulation algorithm execution
cross_tab=cross_tabulation(replaced_value)
cross_tab