I'm trying to do a multivariable regression model, and I want to pick up some machine learning methods and compare and verify the accuracy.
The Python machine learning library scikit-learn has various implementations and is convenient, so I tried using it quickly.
The following are introduced in Examples
import numpy as np
from sklearn.svm import SVR
import matplotlib.pyplot as plt
%matplotlib inline
#Generate input with random numbers
X = np.sort(5 * np.random.rand(40, 1), axis=0)
#The output is a sin function
y = np.sin(X).ravel()
#Add noise to the output
y[::5] += 3 * (0.5 - np.random.rand(8))
#RBF kernel, linear, polynomial fitting
svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1)
svr_lin = SVR(kernel='linear', C=1e3)
svr_poly = SVR(kernel='poly', C=1e3, degree=2)
y_rbf = svr_rbf.fit(X, y).predict(X)
y_lin = svr_lin.fit(X, y).predict(X)
y_poly = svr_poly.fit(X, y).predict(X)
#Create a diagram
plt.figure(figsize=[10, 5])
plt.scatter(X, y, c='k', label='data')
plt.hold('on')
plt.plot(X, y_rbf, c='g', label='RBF model')
plt.plot(X, y_lin, c='r', label='Linear model')
plt.plot(X, y_poly, c='b', label='Polynomial model')
plt.xlabel('data')
plt.ylabel('target')
plt.title('Support Vector Regression')
plt.legend()
plt.show()
result:
import numpy as np
from sklearn.svm import SVR
import matplotlib.pyplot as plt
#Properly generate input
X1 = np.sort(5 * np.random.rand(40, 1).reshape(40), axis=0)
X2 = np.sort(3 * np.random.rand(40, 1).reshape(40), axis=0)
X3 = np.sort(9 * np.random.rand(40, 1).reshape(40), axis=0)
X4 = np.sort(4 * np.random.rand(40, 1).reshape(40), axis=0)
#Integrate an array of inputs into one
X = np.c_[X1, X2, X3, X4]
#Calculate output
y = np.sin(X1).ravel() + np.cos(X2).ravel() + np.sin(X3).ravel() - np.cos(X4).ravel()
y_o = y.copy()
#Add noise
y[::5] += 3 * (0.5 - np.random.rand(8))
#fitting
svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1)
svr_lin = SVR(kernel='linear', C=1e3)
svr_poly = SVR(kernel='poly', C=1e3, degree=3)
y_rbf = svr_rbf.fit(X, y).predict(X)
y_lin = svr_lin.fit(X, y).predict(X)
y_poly = svr_poly.fit(X, y).predict(X)
#Prepare test data
test_X1 = np.sort(5 * np.random.rand(40, 1).reshape(40), axis=0)
test_X2 = np.sort(3 * np.random.rand(40, 1).reshape(40), axis=0)
test_X3 = np.sort(9 * np.random.rand(40, 1).reshape(40), axis=0)
test_X4 = np.sort(4 * np.random.rand(40, 1).reshape(40), axis=0)
test_X = np.c_[test_X1, test_X2, test_X3, test_X4]
test_y = np.sin(test_X1).ravel() + np.cos(test_X2).ravel() + np.sin(test_X3).ravel() - np.cos(test_X4).ravel()
#Try to estimate by plunging test data
test_rbf = svr_rbf.predict(test_X)
test_lin = svr_lin.predict(test_X)
test_poly = svr_poly.predict(test_X)
Below, verification
from sklearn.metrics import mean_squared_error
from math import sqrt
#Correlation coefficient calculation
rbf_corr = np.corrcoef(test_y, test_rbf)[0, 1]
lin_corr = np.corrcoef(test_y, test_lin)[0, 1]
poly_corr = np.corrcoef(test_y, test_poly)[0, 1]
#Calculate RMSE
rbf_rmse = sqrt(mean_squared_error(test_y, test_rbf))
lin_rmse = sqrt(mean_squared_error(test_y, test_lin))
poly_rmse = sqrt(mean_squared_error(test_y, test_poly))
print "RBF: RMSE %f \t\t Corr %f" % (rbf_rmse, rbf_corr)
print "Linear: RMSE %f \t Corr %f" % (lin_rmse, lin_corr)
print "Poly: RMSE %f \t\t Corr %f" % (poly_rmse, poly_corr)
I got this result
RBF: RMSE 0.707305 Corr 0.748894
Linear: RMSE 0.826913 Corr 0.389720
Poly: RMSE 2.913726 Corr -0.614328
Recommended Posts