Korrelationskoeffizientenkarten werden häufig in der Analyse von Atmosphären- und Ozeandaten gezeichnet. In Python haben Numpy und Scipy Funktionen zum Berechnen des Korrelationskoeffizienten, sodass Sie mit ihnen eine Karte des Korrelationskoeffizienten zeichnen können. Es gibt jedoch mehrere Möglichkeiten, dies zu tun. Daher werden wir hier ihre Leistung vergleichen.
def crr_loop_scipy(x1, x2):
ret = numpy.empty([x1.shape[1], x2.shape[1]])
for jj in xrange(ret.shape[1]):
for ii in xrange(ret.shape[0]):
ret[ii,jj], _ = scipy.stats.pearsonr(x1[:,ii], x2[:,jj])
return ret
def crr_loop_numpy(x1, x2):
ret = numpy.empty([x1.shape[1], x2.shape[1]])
for jj in xrange(ret.shape[1]):
for ii in xrange(ret.shape[0]):
ret[ii,jj] = numpy.corrcoef(x1[:,ii], x2[:,jj])[1,0]
return ret
def crr_numpy(x1, x2):
return numpy.corrcoef(x1, x2, rowvar=False)[0:x1.shape[1], x1.shape[1]:]
def crr_original(x1, x2):
crscov = np.dot(x1.T, x2) / x1.shape[0]
std1 = np.std(x1, axis=0).reshape(x1.shape[1], 1)
std2 = np.std(x2, axis=0).reshape(x2.shape[1], 1)
crsstd = np.dot(std1, std2.T)
return crscov / crsstd
def generate_data(nx):
nt = 1000
data1 = numpy.random.randn(nt, nx)
data2 = numpy.random.randn(nt, nx)
return data1, data2
python
import time
def measure_time(f, *args):
start = time.time()
f(*args)
return time.time() - start
def measure_mean_time(f, *args):
n = 2
return numpy.mean([measure_time(f, *args) for i in xrange(n)])
Ergebnis ist,
War überwältigend schnell. Da scipy schneller als numpy ist, kann es auch besser sein, scipy zu verwenden, selbst wenn der Zweck nicht die Korrelationskoeffizientenkarte wie diesmal ist.
import numpy as np
from scipy import stats
import time
def crr_loop_scipy(x1, x2):
ret = np.empty([x1.shape[1], x2.shape[1]])
for jj in xrange(ret.shape[1]):
for ii in xrange(ret.shape[0]):
ret[ii,jj], _ = stats.pearsonr(x1[:,ii], x2[:,jj])
return ret
def crr_loop_numpy(x1, x2):
ret = np.empty([x1.shape[1], x2.shape[1]])
for jj in xrange(ret.shape[1]):
for ii in xrange(ret.shape[0]):
ret[ii,jj] = np.corrcoef(x1[:,ii], x2[:,jj])[1,0]
return ret
def crr_numpy(x1, x2):
return np.corrcoef(x1, x2, rowvar=False)[0:x1.shape[1], x1.shape[1]:]
def crr_original(x1, x2):
crscov = np.dot(x1.T, x2) / x1.shape[0]
std1 = np.std(x1, axis=0).reshape(x1.shape[1], 1)
std2 = np.std(x2, axis=0).reshape(x2.shape[1], 1)
crsstd = np.dot(std1, std2.T)
return crscov / crsstd
def generate_data(nx):
nt = 1000
data1 = np.random.randn(nt, nx)
data2 = np.random.randn(nt, nx)
return data1, data2
def measure_time(f, *args):
start = time.time()
f(*args)
return time.time() - start
def measure_mean_time(f, *args):
n = 2
return np.mean([measure_time(f, *args) for i in xrange(n)])
def experiment(nxs, crrs):
mean_times = np.empty([len(crrs), len(nxs)])
for icrr, crr in enumerate(crrs):
for ix, nx in enumerate(nxs):
x1, x2 = generate_data(nx)
mean_times[icrr, ix] = measure_mean_time(crr, x1, x2)
return mean_times
if __name__ == '__main__:
import matplotlib.pyplot as plt
nxs1 = range(200, 200 + 1000, 200)
nxs2 = range(200, 200 + 10000, 600)
mean_times1 = experiment(nxs1, [crr_loop_scipy, crr_loop_numpy])
mean_times2 = experiment(nxs2, [crr_numpy, crr_original])
lebels1 = ['loop_sci', 'loop_np']
labels2 = ['np', 'original']
for label1, label2, mean_time1, mean_time2 in zip(lebels1, labels2, mean_times1, mean_times2):
plt.plot(nxs1, mean_time1, label=label1)
plt.plot(nxs2, mean_time2, label=label2)
plt.xlabel('nx')
plt.ylabel('time (sec)')
plt.title('nt=1000')
plt.xlim([0, nxs2[-1]])
plt.ylim([0, 80])
plt.legend()
plt.show()
Recommended Posts