[PYTHON] Praktisches Memo zum maschinellen Lernsystem

Dies ist die Site und das Memo von Kapitel 1, auf das ich mich bezog, als ich das Buch "Practical Machine Learning System" las und studierte.

URL der Referenzsite für maschinelles Lernen

Memo "Praktisches maschinelles Lernsystem"

#Was ist Numpy?
#Unterstützung für große mehrdimensionale Arrays und Matrizen,
#Es bietet eine große Bibliothek mathematischer Funktionen auf hoher Ebene, um diese zu bearbeiten.

http://rest-term.com/archives/2999/
http://wbhappy.hatenablog.jp/entry/2015/02/06/210000
# ndarray.Flags Speicherlayoutinformationen für Array-Daten
# ndarray.Anzahl der Dimensionen des ndim-Arrays
# ndarray.Anzahl der Elemente im Größenarray
# ndarray.Form Anzahl der Elemente in jeder Dimension
# ndarray.itemsize Anzahl der Bytes pro Element
# ndarray.Schritte Anzahl der Bytes, die erforderlich sind, um zum nächsten Element in jeder Dimension zu gelangen
# ndarray.nbytes Anzahl der Bytes im gesamten Array
# ndarray.dtype Array-Element-Datentyp(numpy.dtype)

>>> import numpy as np
>>> np.version.full_version
'1.8.0rc1'
>>> a = np.array([0,1,2,3,4,5])
>>> a
array([0, 1, 2, 3, 4, 5])
>>> a.ndim
1
>>> a.shape
(6,)
>>> b = a.reshape((3,2))
>>> b
array([[0, 1],
       [2, 3],
       [4, 5]])
>>> b.ndim
2
>>> b.shape
(3, 2)
>>> b[1][0] = 77
>>> b
array([[ 0,  1],
       [77,  3],
       [ 4,  5]])
>>> a
array([ 0,  1, 77,  3,  4,  5])
>>> c = a.reshape((3,2)).copy()
>>> c
array([[ 0,  1],
       [77,  3],
       [ 4,  5]])
>>> c[0][0] = -99
>>> a
array([ 0,  1, 77,  3,  4,  5])
>>> c
array([[-99,   1],
       [ 77,   3],
       [  4,   5]])
>>> a*2
array([  0,   2, 154,   6,   8,  10])
>>> a**2
array([   0,    1, 5929,    9,   16,   25])
>>> a[np.array([2,3,4])]
array([77,  3,  4])
>>> a>4
array([False, False,  True, False, False,  True], dtype=bool)
>>> a[a>4]
array([77,  5])
>>> a[a>4]=4
>>> a
array([0, 1, 4, 3, 4, 4])
>>> a.clip(0,4)
array([0, 1, 4, 3, 4, 4])
>>> c = np.array([1,2,np.NAN, 3,4]) #Angenommen, aus einer Textdatei gelesen
>>> c
array([  1.,   2.,  nan,   3.,   4.])
>>> np.isnan(c) #Fehlende Werte ersetzen
array([False, False,  True, False, False], dtype=bool)
>>> c[~np.isnan(c)]
array([ 1.,  2.,  3.,  4.])
>>> np.mean(c[~np.isnan(c)])
2.5
>>> import timeit
>>> normal_py_sec = timeit.timeit('sum(x*x for x in xrange(1000))',number=10000)
>>> Naive_np_sec = timeit.timeit('sum(na*na)',setup="import numpy as np; na=np.arange(1000)", number=10000)
>>> good_np_sec = timeit.timeit('na.dot(na)',setup="import numpy as np;  na=np.arange(1000)", number=10000)
>>> print("Normal Python: %f sec"%normal_py_sec)
Normal Python: 0.836571 sec
>>> print("Naive Numpy: %f sec"%Naive_np_sec)
Naive Numpy: 4.806356 sec
>>> print("Good Numpy: %f sec"%good_np_sec)
Good Numpy: 0.039245 sec
>>> a = np.array([1,2,3])
>>> a.dtype
dtype('int64')
>>> np.array([1, "stringry"])
array(['1', 'stringry'],
      dtype='|S8')
>>> np.array([1, "stringy", set([1,2,3])])
array([1, 'stringy', set([1, 2, 3])], dtype=object)

# http://rest-term.com/archives/2999/Als

>>> import numpy as np
>>> a = np.array([[1,2,3],[4,5,6],[7,8,9],[10,11,12]]) #Sequenzgenerierung
>>> a
array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])
>>> a.flags     #Speicherlayoutinformationen für Arraydaten
  C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  UPDATEIFCOPY : False
>>> a.ndim      #Anzahl der Dimensionen
2
>>> a.size      #Elementanzahl
12
>>> a.shape     #Anzahl der Elemente in jeder Dimension(Anzahl der Zeilen,Anzahl der Spalten)
(4, 3)
>>> a.itemsize  #Anzahl der Bytes pro Element
8
>>> a.strides   #24 Bytes für die nächste Zeile, 8 Bytes für die nächste Spalte
(24, 8)
>>> a.nbytes    #Anzahl der Bytes im gesamten Array
96
>>> a.dtype     #Elementdatentyp
dtype('int64')

>>> np.zeros(5)
array([ 0.,  0.,  0.,  0.,  0.])
>>> np.ones(5)
array([ 1.,  1.,  1.,  1.,  1.])
>>> np.ones([2,3])
array([[ 1.,  1.,  1.],
       [ 1.,  1.,  1.]])
>>> np.identity(3)                       #Einheitsmatrix
array([[ 1.,  0.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  0.,  1.]])
>>> np.eye(3)                            #Einheitsmatrix, die die Anzahl der Spalten angeben kann
array([[ 1.,  0.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  0.,  1.]])
>>> np.arange(10)                        # range()Gleich wie
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
>>> np.araange(1,2,0.2)                  #Startpunkt,Endpunkt,inkrementell
array([ 1. ,  1.2,  1.4,  1.6,  1.8])
>>> np.linspace(1,4,6)                   #Bereich, der die Anzahl der Elemente angeben kann()
array([ 1. ,  1.6,  2.2,  2.8,  3.4,  4. ])
>>> np.logspace(2,3,4)                   #Log
array([  100.        ,   215.443469  ,   464.15888336,  1000.        ])
>>> np.logspace(2,4,4, base=2)           #Unten 2
array([  4.        ,   6.34960421,  10.0793684 ,  16.        ])
>>> np.tile([0,1,2,3,4], 2)              #Gibt ein Array generierter und wiederholter Elemente zurück
array([0, 1, 2, 3, 4, 0, 1, 2, 3, 4])
>>> a,b = np.meshgrid([1,2,3],[4,5,6,7]) #Eine gitterartige Anordnung, die vertikal und horizontal gleichmäßig verteilt ist
>>> a
array([[1, 2, 3],
       [1, 2, 3],
       [1, 2, 3],
       [1, 2, 3]])
>>> b
array([[4, 4, 4],
       [5, 5, 5],
       [6, 6, 6],
       [7, 7, 7]])
>>> np.tri(3) #Dreiecksmatrix
array([[ 1.,  0.,  0.],
       [ 1.,  1.,  0.],
       [ 1.,  1.,  1.]])
>>> a = np.array([[0,1,2],[3,4,5],[6,7,8]])
>>> a
array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])
>>> np.diag(a)                  #Ein Array von diagonalen Elementen, die aus dem Eingabearray extrahiert wurden
array([0, 4, 8])
>>> np.empty(5)                 #Es wird nicht nur durch Sichern des Bereichs initialisiert
array([  0.00000000e+000,   4.94065646e-324,   9.88131292e-324,
         1.48219694e-323,   1.97626258e-323])
>>> a = np.array([1,2,3])
>>> b = a.copy()
>>> b
array([1, 2, 3])
>>> np.random.randint(0,100,10) #Bereich der zu generierenden Zufallszahlen(Mindestwert,Maximalwert,Elementanzahl)Angeben
array([67, 65, 61, 15, 48, 57, 42, 21, 49, 57])
>>> a = np.arange(10)
>>> a
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
>>> a = np.arange(10)
>>> b = a.reshape((2,5))        #Änderung der Arrayform(In diesem Fall zu einem zweidimensionalen Array)
>>> b
array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])
>>> a.resize((2,5))             #Wechseln Sie zu einem zweidimensionalen Array mit der gleichen Anzahl von Elementen
>>> a
array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])
>>> a = np.tile(np.arange(3),3) # range(3)3-reihiges Gitter
>>> a
array([0, 1, 2, 0, 1, 2, 0, 1, 2])
>>> np.argmax(a)                #Der kleinste Index der Maximalwertelemente
2
>>> np.argmin(a)                #Der kleinste Index des minimalen Elements
0
>>> a = np.eye(3)               #3 Arrays
>>> a
array([[ 1.,  0.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  0.,  1.]])
>>> np.nonzero(a)               #Gibt ein Indexarray von Nicht-Null-Elementen zurück(In diesem Fall handelt es sich um ein zweidimensionales Array, also um zwei)
(array([0, 1, 2]), array([0, 1, 2]))
>>> a = np.arange(15).reshape((3,5))
>>> a
array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])
>>> np.where(a%2==0)            #Indexarray von Elementen, die den Bedingungen entsprechen
(array([0, 0, 0, 1, 1, 2, 2, 2]), array([0, 2, 4, 1, 3, 0, 2, 4]))
>>> a = np.arange(10)
>>> np.select([a<3, a>5],[a, a**2]) #Erstes Argument für die Suche nach mehreren Bedingungen:Array von Bedingungen Zweites Argument:Ein Array von Werten, die im Index des Elements festgelegt werden sollen, das der Bedingung entspricht
array([ 0,  1,  2,  0,  0,  0, 36, 49, 64, 81])
>>> a = np.arange(9).reshape((3,3))
>>> a
array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])
>>> b = np.arange(8,-1,-1).reshape((3,3))
>>> b
array([[8, 7, 6],
       [5, 4, 3],
       [2, 1, 0]])
>>> np.dstack((a,b))            #Kombinieren Sie zweidimensionale Arrays zu einem dreidimensionalen Array
array([[[0, 8],
        [1, 7],
        [2, 6]],

       [[3, 5],
        [4, 4],
        [5, 3]],

       [[6, 2],
        [7, 1],
        [8, 0]]])
>>> np.hstack((a,b))            #In Spaltenrichtung verbinden
array([[0, 1, 2, 8, 7, 6],
       [3, 4, 5, 5, 4, 3],
       [6, 7, 8, 2, 1, 0]])
>>> np.vstack((a,b))            #In Reihenrichtung verbinden
array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8],
       [8, 7, 6],
       [5, 4, 3],
       [2, 1, 0]])
>>> a = np.arange(16).reshape(2,2,4)
>>> a
array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7]],

       [[ 8,  9, 10, 11],
        [12, 13, 14, 15]]])
>>> np.dsplit(a,2)              #Teilen Sie ein dreidimensionales Array
[array([[[ 0,  1],
        [ 4,  5]],

       [[ 8,  9],
        [12, 13]]]), array([[[ 2,  3],
        [ 6,  7]],

       [[10, 11],
        [14, 15]]])]
>>> a = np.arange(16).reshape(4,4)
>>> a
array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])
>>> np.hsplit(a,2)              #In Spaltenrichtung teilen
[array([[ 0,  1],
       [ 4,  5],
       [ 8,  9],
       [12, 13]]), array([[ 2,  3],
       [ 6,  7],
       [10, 11],
       [14, 15]])]
>>> np.vsplit(a,2)              #In Zeilenrichtung teilen
[array([[0, 1, 2, 3],
       [4, 5, 6, 7]]), array([[ 8,  9, 10, 11],
       [12, 13, 14, 15]])]
>>> a = np.array([[1,2],[3,4]])
>>> a
array([[1, 2],
       [3, 4]])
>>> np.transpose(a)             #Quersequenz
array([[1, 3],
       [2, 4]])
>>> a = np.array([[1,2,3]])
>>> np.swapaxes(a,0,1)          #Wellenwechsel
array([[1],
       [2],
       [3]])
>>> a = np.random.randint(0,500,20)
>>> a
array([444,  97, 324, 492, 275,  95, 157, 336,  51, 249, 363, 409, 299,
       432,  41, 469, 201, 308,  85, 455])
>>> np.amax(a)                  #Maximalwert
492
>>> np.amin(a)                  #Mindestwert
41
>>> np.ptp(a)                   #Wertebereich(Maximalwert-Mindestwert)
451
>>> np.mean(a)                  #Arithmetischer Durchschnitt
279.10000000000002
>>> np.median(a)                #Median
303.5
>>> np.std(a)                   #Standardabweichung
146.4031761950539
>>> np.var(a)                   #Verteilt
21433.889999999999
>>> b = np.random.randint(0,500,20)
>>> b
array([375, 207, 495, 320, 472, 481, 491, 133, 279, 480, 232, 261, 492,
       183, 168, 424,  95, 236, 176, 332])
>>> np.corrcoef(a,b)            #Korrelationskoeffizient
array([[ 1.        ,  0.12452095],
       [ 0.12452095,  1.        ]])
>>> c = np.random.randint(0,10,20)
>>> c
array([6, 5, 9, 7, 9, 6, 4, 0, 1, 4, 6, 3, 2, 7, 9, 3, 4, 9, 4, 8])
>>> np.histogram(c)             #Histogramm
(array([1, 1, 1, 2, 4, 1, 3, 2, 1, 4]), array([ 0. ,  0.9,  1.8,  2.7,  3.6,  4.5,  5.4,  6.3,  7.2,  8.1,  9. ]))

#Was ist SciPy??
#Bietet viele Algorithmen
#  cluster :Hierarchisches Clustering/Vektorquantisierung/K durchschnittliche Methode
#  constants :Physikalische mathematische Konstante
#  fftpack :Fourier-Definition
#  integrate :Integration
#  interpolate :Interpolation(linear,Kubisch etc.)
#  io :Dateneingabe / -ausgabe
#  linalg :Lineare Algebra-Routine mit BLAS- und LAPACK-Bibliotheken
#  maxentropy :Entropieverteilung
#  ndimage :n-dimensionales Bildpaket
#  odr :Orthogonale Distanzregression
#  optimize :Optimierung
#  signal :Signalverarbeitung
#  sparse :Spärliche Matrix
#  spatial :Geodatenstruktur und Algorithmen
#  special :Spezielle Funktionen wie Schiffsfunktion und Yakko Bian
#  stats :Statistiken

>>> import scipy, numpy
>>> scipy.version.full_version
'0.13.0b1'
>>> scipy.dot is numpy.dot
True #Gleicher Namespace wie Numpy
>>> data = sp.genfromtxt("sample/ch01/data/web_traffic.tsv", delimiter='\t')
>>> print(data.shape)
(743, 2)
>>> print(data[:10])

>>> import scipy as sp
>>> data = sp.genfromtxt("./sample/ch01/data/web_traffic.tsv", delimiter="\t") #Daten gelesen
>>> print(data[:10])
 [  2.00000000e+00   1.65600000e+03]
 [  3.00000000e+00   1.38600000e+03]
 [  4.00000000e+00   1.36500000e+03]
 [  5.00000000e+00   1.48800000e+03]
 [  6.00000000e+00   1.33700000e+03]
 [  7.00000000e+00   1.88300000e+03]
 [  8.00000000e+00   2.28300000e+03]
 [  9.00000000e+00   1.33500000e+03]
 [  1.00000000e+01   1.02500000e+03]]
>>> print(data.shape)
(743, 2)
>>> x = data[:,0] #verstrichene Zeit(SciPy:Extrahieren Sie die 0. Dimension)
>>> y = data[:,1] #Anzahl der Zugriffe
>>> sp.sum(sp.isnan(y)) #Unangemessener Wert
0
>>> x = x[~sp.isnan(y)] #Beseitigen Sie unangemessene Werte
>>> y = y[~sp.isnan(y)] #Beseitigen Sie unangemessene Werte
>>> import matplotlib.pyplot as plt #Streudiagramm
>>> plt.scatter(x,y)
<matplotlib.collections.PathCollection object at 0x11192e5d0>
>>> plt.title("Web traffic over the last month")
<matplotlib.text.Text object at 0x1118f7c90>
>>> plt.xlabel("Time")
<matplotlib.text.Text object at 0x111636090>
>>> plt.ylabel("Hits/hour")
<matplotlib.text.Text object at 0x111649fd0>
>>> plt.xticks([w*7*24 for w in range(10)], ['week %i' %w for w in range(10)])
([<matplotlib.axis.XTick object at 0x10e349710>, <matplotlib.axis.XTick object at 0x111653450>, <matplotlib.axis.XTick object at 0x11192edd0>, <matplotlib.axis.XTick object at 0x1119514d0>, <matplotlib.axis.XTick object at 0x111951c10>, <matplotlib.axis.XTick object at 0x113505390>, <matplotlib.axis.XTick object at 0x113505ad0>, <matplotlib.axis.XTick object at 0x11350e250>, <matplotlib.axis.XTick object at 0x11350e990>, <matplotlib.axis.XTick object at 0x11351a110>], <a list of 10 Text xticklabel objects>)
>>> plt.autoscale(tight=True)
>>> plt.grid()
>>> plt.show()
#Kurvenanpassung
# polyfit(x,y,n) :Funktionen für die Regressionsanalyse(Regressionsanalyse mit zwei Variablen mit Gleichung n-ter Ordnung)
#Regressionsanalyse...Ein Verfahren zum Erhalten einer Vorhersageformel (Regressionslinie) zum Vorhersagen zukünftiger Werte aus einer von zwei Variablen, von denen angenommen wird, dass sie eine Korrelation oder einen Kausalzusammenhang aufweisen.
>>> def error(f, x, y): #Fehler bei der Annahme, dass die Modellfunktion f vorhanden ist
...     return sp.sum((f(x)-y)**2)
>>> fp1, residuals, rank, sv, rcond = sp.polyfit(x, y, 1, full=True) #x mit Polyfit,Ermitteln Sie den Koeffizienten des Modells, der sich y dem kleinsten Quadrat annähert
>>> print("Model parameters: %s" % fp1)
Model parameters: [    2.57152281  1002.10684085]
>>> print(residuals) #Überschuss
[  3.19874315e+08]
>>> print(rank) #Matrix Rang
2
>>> print(rcond) #Die Umkehrung der Anzahl der Bedingungen
1.64979141459e-13
Referenz: http://ktadaki.hatenablog.com/entry/2015/10/29/155340
Ich habe Werte in 5 Variablen eingefügt, aber ich verwende nur das erste fp1. Der Inhalt von fp1 sieht so aus.
[   2.59619213  989.02487106]
Mit anderen Worten, ich habe diese Formel bekommen.
f(x)=2.59619213x+989.02487106
>>> f1 = sp.poly1d(fp1) #Modellfunktion erstellen
>>> print(error(f1,x,y))
319874314.777
>>> import matplotlib.pyplot as plt #Streudiagramm
>>> plt.scatter(x,y)
<matplotlib.collections.PathCollection object at 0x11192e5d0>
>>> plt.title("Web traffic over the last month")
<matplotlib.text.Text object at 0x1118f7c90>
>>> plt.xlabel("Time")
<matplotlib.text.Text object at 0x111636090>
>>> plt.ylabel("Hits/hour")
<matplotlib.text.Text object at 0x111649fd0>
>>> plt.xticks([w*7*24 for w in range(10)], ['week %i' %w for w in range(10)]) #Schreiben Sie die x-Achsen-Skala neu. Geben Sie im Argument "wo" und "was" an, die in einer Liste angezeigt werden sollen.
([<matplotlib.axis.XTick object at 0x10e349710>, <matplotlib.axis.XTick object at 0x111653450>, <matplotlib.axis.XTick object at 0x11192edd0>, <matplotlib.axis.XTick object at 0x1119514d0>, <matplotlib.axis.XTick object at 0x111951c10>, <matplotlib.axis.XTick object at 0x113505390>, <matplotlib.axis.XTick object at 0x113505ad0>, <matplotlib.axis.XTick object at 0x11350e250>, <matplotlib.axis.XTick object at 0x11350e990>, <matplotlib.axis.XTick object at 0x11351a110>], <a list of 10 Text xticklabel objects>)
>>> plt.autoscale(tight=True)
<matplotlib.legend.Legend object at 0x10c587ad0>
>>> fx = sp.linspace(0, x[-1], 1000) #Zum Plotten"x-Wert"Generieren Sie a
>>> plt.plot(fx, f1(fx), linewidth=4) #Zeichnen Sie die Liste als Grafik
[<matplotlib.lines.Line2D object at 0x10c587850>]
>>> plt.legend(["d=%i" % f1.order], loc="upper left") #Legende anzeigen
>>> plt.grid()
>>> plt.show()

>>> f2p = sp.polyfit(x, y, 2)
>>> print(f2p)
[  1.04688184e-02  -5.21727812e+00   1.96921629e+03]
>>> f2 = sp.poly1d(f2p)
>>> print(error(f2, x, y))
182006476.432
# f(x) = 0.0105322215 * x**2 - 5.26545650 * x + 1974.76802
>>> plt.plot(fx, f2(fx), linewidth=4)
#↑ In die vorherige einbinden
#Genauere Kurve, aber komplexe Funktion
#Auftrag-3,10,Versucht bei 100 → Überlernen
#Ich habe es mit Bestellung 1 → verlernt versucht
#3 auf der ersten geraden Linie.Lernen mit Daten, die älter als 5 Wochen sind,Die zweite gerade Linie verwendet die Daten danach
>>> inflection = 3.5*7*24 #Berechnen Sie den Zeitpunkt des Änderungspunkts
>>> xa = x[:inflection] #Datenpunkt vor dem Änderungspunkt
>>> ya = y[:inflection]
>>> xb = x[:inflection] #Nach dem Wechselpunkt
>>> yb = y[:inflection]
>>> fa = sp.poly1d(sp.polyfit(xa, ya, 1))
>>> fb = sp.poly1d(sp.polyfit(xb, yb, 1))
>>> fa_error = error(fa, xa, ya)
>>> fb_error = error(fb, xb, yb)
>>> print("Error inflection=%f" % (fa_error + fb_error))
Error inflection=218985429.871767
# plt.plot(fx, fa(fx), linewidth=4)
# plt.plot(fx, fb(fx), linewidth=4)Zeigen Sie die Figur auf die gleiche Weise an
#Berechnen Sie den Fehler anhand der Testdaten für das Modell, das anhand der Daten nach dem Änderungspunkt trainiert wurde
>>> frac = 0.3 #Zuordnung der für den Test verwendeten Daten
>>> split_idx = int(frac * len(xb))
>>> shuffled = sp.random.permutation(list(range(len(xb)))) #30 aller Daten%Nach dem Zufallsprinzip auswählen
>>> test = sorted(shuffled[:split_idx]) #Indexarray von Testdaten
>>> train = sorted(shuffled[split_idx:]) #Datenindex-Array für das Training
>>> #Trainiere mit den einzelnen Trainingsdaten
>>> fbt1 = sp.poly1d(sp.polyfit(xb[train], yb[train], 1))
>>> fbt2 = sp.poly1d(sp.polyfit(xb[train], yb[train], 2))
>>> fbt3 = sp.poly1d(sp.polyfit(xb[train], yb[train], 3))
>>> fbt10 = sp.poly1d(sp.polyfit(xb[train], yb[train], 10))
>>> fbt100 = sp.poly1d(sp.polyfit(xb[train], yb[train], 100))
/System/Library/Frameworks/Python.framework/Versions/2.7/Extras/lib/python/numpy/lib/polynomial.py:579: RuntimeWarning: overflow encountered in multiply
  scale = NX.sqrt((lhs*lhs).sum(axis=0))
/System/Library/Frameworks/Python.framework/Versions/2.7/Extras/lib/python/numpy/lib/polynomial.py:587: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
>>> #Bewerten Sie anhand der einzelnen Trainingsdaten
>>> for f in [fbt1, fbt2, fbt3, fbt10, fbt100]:
...     print("Error d=%i: %f"  % (f.order, error(f, xb[test], yb[test])))
...
Error d=1: 33618254.181783
Error d=2: 31298428.161162
Error d=3: 30849423.817712
Error d=10: 28969336.428648
Error d=55: 28919778.656526
#100 Anfragen pro Stunde,Voraussichtlich über 000-Finden Sie die Lösung der quadratischen Gleichung
#100 von Polypoly,Subtrahieren Sie 000, um ein neues Polypoly zu erstellen und die Wurzel für dieses neue Polypoly zu finden
>>> print(fbt2)
          2
0.004136 x - 1.662 x + 1677
>>> print(fbt2-100000)
          2
0.004136 x - 1.662 x - 9.832e+04
>>> from scipy.optimize import fsolve
>>> reached_max = fsolve(fbt2-100000, 800)/(7*24)
>>> print("100,000 hits/hour expected at week %f" % reached_max[0])
100,000 hits/hour expected at week 30.241873

#Kapitel 2 P27
#Einstufung/Mit einem Lehrer lernen
#Iris-Datensatz
#Extra kleine Menge
#Dreieck:Setosa Maru:Versucikir Bestrafung:Virginica
>>> from matplotlib import pyplot as plt
>>> from sklearn.datasets import load_iris
>>> import numpy as np
>>> data = load_iris() #Laden von sklearn_Laden Sie Daten mit der Irisfunktion
>>> features = data['data']
>>> feature_names = data['feature_names']
>>> target = data['target']
>>> target_names = data['target_names']
>>> labels = target_names[target] # ?
>>> for t,marker,c in zip(range(3), ">ox","rgb"):
...     plt.scatter(features[target == t,0],
...                 features[target == t,1],
...                 marker = marker,
...                 c = c) #Plotten Sie mit Markern unterschiedlicher Farben für jede Klasse
...
<matplotlib.collections.PathCollection object at 0x10a5ec668>
<matplotlib.collections.PathCollection object at 0x10a287208>
<matplotlib.collections.PathCollection object at 0x10a5fa908>
#Die "Blütenblattlänge" wird als drittes im Array gespeichert.
>>> plength = features[:, 2]
>>> is_setosa = (labels == 'setosa') #Generieren Sie ein boolesches Array, ob setosa oder nicht
>>> max_setosa = plength[is_setosa].max()
>>> min_non_setosa = plength[~is_setosa].min()
>>> print('Maximum of setosa: {0}.'.format(max_setosa))
Maximum of setosa: 1.9. #Maximale Blütenblattlänge->1.9
>>> print('Minimum of others: {0}.'.format(min_non_setosa))
Minimum of others: 3.0. #Minimale Blütenblattlänge->3.0
>>> def apply_model( example ):
...     if example[2] < 2:
...         print("Iris Setosa")
...     else:
...         print("Iris Virginica or Itis Versicolor")
#Machen Sie den Unterschied zu anderen Iris bestmöglich
>>> features = features[~is_setosa]
>>> labels = labels[~is_setosa]
>>> virginica = (labels == 'virginica')
>>> best_acc = -1.0
>>> best_fi = -1.0
>>> best_t = -1.0
>>> for fi in range(features.shape[1]): #Generieren Sie Schwellenwertkandidaten für jeden besonders kleinen Betrag
...     thresh = features[:,fi].copy()
...     thresh.sort()
...     for t in thresh: #Bei allen Schwellenwerten testen
...         pred = (features[:,fi] > t)
...         acc = (labels[pred] == 'virginica').mean()
...         if acc > best_acc:
...             best_acc = acc
...             best_fi  = fi
...             best_t   = t
>>> def apply_model( example ):
...     if(example[best_fi] > best_t):
...         print("virginica")
...     else:
...         print("virsicolor")
# heldout.Starten Sie py
# python3 ./sample/ch02/heldout.py
>>> from threshold import learn_model, apply_model, accuracy
>>> for ei in range(len(features)): #Alle Daten außer ei th werden verwendet
...     training = np.ones(len(features), bool)
...     training[ei] = False
...     testing = ~training
...     model = learn_model(features[training], virginica[training])
...     predictions = apply_model(features[testing], virginica[testing], model)
...     error += np.sum(predictions != virginica[testing])