Synopsis up to the last time

Last time read the csv file and drew a scatter plot The completed code and figure looks like this

import numpy as np
import matplotlib.pyplot as plt

data_set = np.loadtxt(
    fname="sampleData.csv",
    dtype="float",
    delimiter=",",
)

#Draw a scatter plot → use scatter
#Take out line by line and draw
#plt.scatter(x coordinate value,y coordinate value)
for data in data_set:
    plt.scatter(data[0], data[1])

plt.title("correlation")
plt.xlabel("Average Temperature of SAITAMA")
plt.ylabel("Average Temperature of IWATE")
plt.grid()

plt.show()

What to do this time

Use scikit-learn to do a linear regression and draw a regression line

procedure##

1 Extract x-coordinate and y-coordinate data from csv

#x,Store y data in another array
x = np.array(1) #Prepare a numpy array
y = np.array(1) #At this time, there is data that is not needed at the beginning
for data in data_set:
    x = np.append(x, data[0]) #Add data with append
    y = np.append(y, data[1])
x = np.delete(x, 0,0) #Delete unnecessary data
y = np.delete(y, 0,0)

2 Put x and y taken in 1 into the model that performs linear regression. 3 Make a straight line by making a prediction with the model created in 2. 4 Draw with matplotlib

What is scikit-learn

It is a module that performs regression and classification (Zackri) Click here for details → Official page

Code # using a linear regression model

#Import module for linear regression
from sklearn.linear_model import LinearRegression

#In numpy linspace for the x coordinate of the regression line-Equally prepare 100 values from 10 to 40
line_x = np.linspace(-10, 40, 100)

#scikit-Find the prediction formula with the least squares model in learn
model = LinearRegression()
model = model.fit(x.reshape(-1,1), y.reshape(-1,1)) #Put the data in the model
model_y = model.predict(line_x.reshape(-1,1)) #Forecast
plt.plot(line_x, model_y, color = 'yellow')

model = model.fit (x.reshape (-1,1), y.reshape (-1,1)), but the shape of the numpy array is changed to match the argument of the function For more information here

Here is the completed code and diagram

import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

data_set = np.loadtxt(
    fname="sampleData.csv",
    dtype="float",
    delimiter=",",
)
#x,Store y data in another array
x = np.array(1)
y = np.array(1)
for data in data_set:
    x = np.append(x, data[0])
    y = np.append(y, data[1])
x = np.delete(x, 0,0)
y = np.delete(y, 0,0)


#Draw a scatter plot
for data in data_set:
    plt.scatter(data[0], data[1])


#scikit-Find the prediction formula with the least squares model in learn
model = LinearRegression()
model = model.fit(x.reshape(-1,1), y.reshape(-1,1))
line_x = np.linspace(-10, 40, 100)
model_y = model.predict(line_x.reshape(-1,1))
plt.plot(line_x, model_y, color = 'yellow')

plt.title("correlation")
plt.xlabel("Average Temperature of SAITAMA")
plt.ylabel("Average Temperature of IWATE")
plt.grid()

plt.show()

Thank you for your hard work