lst = [-1, -0.7, -0.3, 0, 0.3, 0.7, 1]
fig, ax = plt.subplots(1, len(lst), figsize=(10*len(lst), 10))
for idx, corrcoef in enumerate(lst):
mean = np.array([0, 0])
cov = np.array([[1, corrcoef], [corrcoef, 1]])
x, y = np.random.multivariate_normal(mean, cov, 5000).T
ax[idx].scatter(x, y, color='royalblue')
ax[idx].set_title(f'corrcoef = {corrcoef:.2f}', size=50)
ax[idx].tick_params(bottom=False, left=False, labelbottom=False, labelleft=False)
x = np.random.randint(1, 1000, 1000)
y = ((x-400) ** 3 - 100 * (x-200) ** 2 + 100000000) / 1000000
corr_coef = np.corrcoef(x, y)[0, 1] #Korrelationsmatrix
fig, ax = plt.subplots()
ax.scatter(x,y,color='royalblue')
ax.set_title(f'corr={corr_coef:.3f}', size=18)
ax.tick_params(bottom=False, left=False, labelbottom=False, labelleft=False)
x = np.random.randint(1, 1000, 100)
y = (x - 500 ) ** 2 / 100 + 300
corr_coef = np.corrcoef(x, y)[0, 1]
fig, ax = plt.subplots()
ax.scatter(x,y,color='royalblue')
ax.set_title(f'corr={corr_coef:.3f}', size=18)
ax.tick_params(bottom=False, left=False, labelbottom=False, labelleft=False)
Bei Betrachtung der gesamten Daten (linke Abbildung) ist der Korrelationskoeffizient niedrig. Ein Beispiel, bei dem eine hohe Korrelation besteht, wenn der Datenbereich von "x = 900" oder mehr eingegrenzt wird (rechte Abbildung).
x = np.random.randint(900, 1000, 1000)
noise = np.random.randn(1000)
y = x + 10 * noise
corr_coef = np.corrcoef(x, y)[0, 1]
fig, ax = plt.subplots(1, 2, figsize=(10, 5))
ax[1].scatter(x,y,color='royalblue')
ax[1].set_title(f'corr={corr_coef:.3f}',size=18)
ax[1].tick_params(bottom=False, left=False, labelbottom=False, labelleft=False)
x2 = list(x) + [600, 700, 800]
y2 = list(y) + [2000, 1800, 1500]
corr_coef = np.corrcoef(x2, y2)[0, 1]
ax[0].scatter(x2,y2,color='royalblue')
ax[0].set_title(f'corr={corr_coef:.3f}',size=18)
ax[0].tick_params(bottom=False, left=False, labelbottom=False, labelleft=False)
import numpy as np
import matplotlib.pyplot as plt
x = np.array([2.0, 3.5, 4.0, 4.5, 5.0, 5.5])
y = np.array([3.0, 3.2, 3.9, 5.2, 8.4, 10.5])
xp = np.linspace(0, 8, 100)
for val in range(1, 2):
fx = np.poly1d(np.polyfit(x, y, val))
fig, ax = plt.subplots()
ax.plot(xp, fx(xp), '-', color='blue')
ax.scatter(x, y, color='deepskyblue', s=32)
ax.text(0.05, 0.8, s=f'y = {fx.coef[0]:.2f} x {fx.coef[1]:.2f}',size='x-large', transform=ax.transAxes)
ax.axhline([0], color='black')
ax.set_xlim(0, None)
ax.set_ylim(-3, 14)
ax.set_ylabel('Cost [JPY]')
ax.set_xlabel('Explanatory variables')
import numpy as np
import matplotlib.pyplot as plt
x = np.array([2.0, 3.5, 4.0, 4.5, 5.0, 5.5])
y = np.array([3.0, 3.2, 3.9, 5.2, 8.4, 10.5])
xp = np.linspace(2, 5.5, 100)
xp1 = np.linspace(0, 2, 100)
xp2 = np.linspace(5.5, 8, 100)
for val in range(1, 2):
fx = np.poly1d(np.polyfit(x, y, val))
fig, ax = plt.subplots()
ax.plot(xp, fx(xp), '-', color='blue')
ax.plot(xp1, fx(xp1), '-', color='red', linestyle='dashed')
ax.plot(xp2, fx(xp2), '-', color='red', linestyle='dashed')
ax.scatter(x, y, color='deepskyblue', s=32)
ax.text(0.05, 0.8, s=f'y = {fx.coef[0]:.2f} x {fx.coef[1]:.2f}',size='x-large', transform=ax.transAxes)
ax.axhline([0], color='black')
ax.axvline([2], color='gray', linestyle='dotted')
ax.axvline([5.5], color='gray', linestyle='dotted')
ax.set_xlim(0, 8)
ax.set_ylim(-3, 14)
ax.set_ylabel('Cost [JPY]')
ax.set_xlabel('Explanatory variables')
Wenn Sie die Reihenfolge erhöhen oder die Anzahl der erklärenden Variablen erhöhen, Es passt zu den trainierten Daten, aber die Genauigkeit der Vorhersage unbekannter Daten nimmt ab. Die Abbildung zeigt den Fall, in dem die Reihenfolge erhöht wird.
import numpy as np
import matplotlib.pyplot as plt
x = np.array([2.0, 3.5, 4.0, 4.5, 5.0, 5.5])
y = np.array([3.0, 3.2, 3.9, 5.2, 8.4, 10.5])
xp = np.linspace(0, 8, 100)
for val in range(2, 6):
fx = np.poly1d(np.polyfit(x, y, val))
fig, ax = plt.subplots()
ax.plot(xp, fx(xp), '-', color='blue')
ax.scatter(x, y, color='deepskyblue', s=32)
ax.axhline([0], color='black')
ax.set_xlim(0, None)
ax.set_ylim(-3, 14)
ax.set_ylabel('Cost [JPY]')
ax.set_xlabel('Explanatory variables')
ax.text(0.75, 0.85, s=f'Digree = {val}',size='x-large', transform=ax.transAxes)
Recommended Posts