Last time read the csv file and drew a scatter plot The completed code and figure looks like this
import numpy as np
import matplotlib.pyplot as plt
data_set = np.loadtxt(
fname="sampleData.csv",
dtype="float",
delimiter=",",
)
#Draw a scatter plot → use scatter
#Take out line by line and draw
#plt.scatter(x coordinate value,y coordinate value)
for data in data_set:
plt.scatter(data[0], data[1])
plt.title("correlation")
plt.xlabel("Average Temperature of SAITAMA")
plt.ylabel("Average Temperature of IWATE")
plt.grid()
plt.show()
Use scikit-learn
to do a linear regression and draw a regression line
1 Extract x-coordinate and y-coordinate data from csv
#x,Store y data in another array
x = np.array(1) #Prepare a numpy array
y = np.array(1) #At this time, there is data that is not needed at the beginning
for data in data_set:
x = np.append(x, data[0]) #Add data with append
y = np.append(y, data[1])
x = np.delete(x, 0,0) #Delete unnecessary data
y = np.delete(y, 0,0)
2 Put x and y taken in 1 into the model that performs linear regression. 3 Make a straight line by making a prediction with the model created in 2. 4 Draw with matplotlib
It is a module that performs regression and classification (Zackri) Click here for details → Official page
#Import module for linear regression
from sklearn.linear_model import LinearRegression
#In numpy linspace for the x coordinate of the regression line-Equally prepare 100 values from 10 to 40
line_x = np.linspace(-10, 40, 100)
#scikit-Find the prediction formula with the least squares model in learn
model = LinearRegression()
model = model.fit(x.reshape(-1,1), y.reshape(-1,1)) #Put the data in the model
model_y = model.predict(line_x.reshape(-1,1)) #Forecast
plt.plot(line_x, model_y, color = 'yellow')
model = model.fit (x.reshape (-1,1), y.reshape (-1,1))
, but the shape of the numpy array is changed to match the argument of the function
For more information here
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
data_set = np.loadtxt(
fname="sampleData.csv",
dtype="float",
delimiter=",",
)
#x,Store y data in another array
x = np.array(1)
y = np.array(1)
for data in data_set:
x = np.append(x, data[0])
y = np.append(y, data[1])
x = np.delete(x, 0,0)
y = np.delete(y, 0,0)
#Draw a scatter plot
for data in data_set:
plt.scatter(data[0], data[1])
#scikit-Find the prediction formula with the least squares model in learn
model = LinearRegression()
model = model.fit(x.reshape(-1,1), y.reshape(-1,1))
line_x = np.linspace(-10, 40, 100)
model_y = model.predict(line_x.reshape(-1,1))
plt.plot(line_x, model_y, color = 'yellow')
plt.title("correlation")
plt.xlabel("Average Temperature of SAITAMA")
plt.ylabel("Average Temperature of IWATE")
plt.grid()
plt.show()
Thank you for your hard work