2. Multivariate analysis spelled out in Python 6-3. Ridge regression / Lasso regression (scikit-learn) [How regularization works]

⑴ Import library

#Data processing / calculation / analysis library
import numpy as np
import pandas as pd

#Graph drawing library
import matplotlib.pyplot as plt
%matplotlib inline

#Machine learning library
import sklearn
from sklearn.linear_model import Ridge, Lasso #Class for regression model generation
#Module to make matplotlib support Japanese display
!pip install japanize-matplotlib
import japanize_matplotlib

⑵ Data acquisition and reading

#Get data
url = 'https://raw.githubusercontent.com/yumi-ito/sample_data/master/ridge_lasso_50variables.csv'

#Read the acquired data as a DataFrame object
df = pd.read_csv(url)



#Create explanatory variable x by deleting the "y" column
x = df.drop('y', axis=1)

#Extract the "y" column to create the objective variable y
y = df['y']

(3) Generation of regularization parameter λ

# λ(alpha)Generate 50 ways
num_alphas = 50
alphas = np.logspace(-2, 0.7, num_alphas)





Logarithmic scale


⑷ Estimate by ridge regression

#Variable to store regression coefficients
ridge_coefs = []

#Repeat the estimation of ridge regression while exchanging alpha
for a in alphas:
    ridge = Ridge(alpha = a, fit_intercept = False)
    ridge.fit(x, y)
#Convert the accumulated regression coefficients to a numpy array
ridge_coefs = np.array(ridge_coefs)

print("Array shape:", ridge_coefs.shape)


#Logarithmic conversion of alphas(-log10)
log_alphas = -np.log10(alphas)

#Specifying the size of the graph area
plt.figure(figsize = (8,6))

#Line graph with λ on the x-axis and coefficients on the y-axis
plt.plot(log_alphas, ridge_coefs)

#Explanatory variable x_Show 1
plt.text(max(log_alphas) + 0.1, np.array(ridge_coefs)[0,0], "x_1", fontsize=13)

#Specify x-axis range
plt.xlim([min(log_alphas) - 0.1, max(log_alphas) + 0.3])

#Axis label
plt.xlabel("Regularization parameter λ(-log10)", fontsize=13)
plt.ylabel("Regression coefficient", fontsize=13)

#Scale line


⑸ Estimate by lasso regression

#Variable to store regression coefficients
lasso_coefs = []

#Repeat the estimation of the lasso regression while exchanging alpha
for a in alphas:
    lasso = Lasso(alpha = a, fit_intercept = False)
    lasso.fit(x, y)
#Convert the accumulated regression coefficients to a numpy array
lasso_coefs = np.array(lasso_coefs)

print("Array shape:", lasso_coefs.shape)


#Specifying the size of the graph area
plt.figure(figsize = (8,6))

#Line graph with λ on the x-axis and coefficients on the y-axis
plt.plot(log_alphas, lasso_coefs)

#Explanatory variable x_Show 1
plt.text(max(log_alphas) + 0.1, np.array(lasso_coefs)[0,0], "x_1", fontsize=13)

#Specify x-axis range
plt.xlim([min(log_alphas) - 0.1, max(log_alphas) + 0.3])

#Axis label
plt.xlabel("Regularization parameter λ(-log10)", fontsize=13)
plt.ylabel("Regression coefficient", fontsize=13)

#Scale line



