[PYTHON] Machine learning template for handwritten digit data

Python textbook to acquire practical skills

If you throw in an appropriate number image with python3 digits.py $ {fileName}, it will predict.

スクリーンショット 2017-05-27 9.36.24.png

`digits.py`


import os, sys, math
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets, model_selection, svm, metrics
from sklearn.externals import joblib
from PIL import Image

#Model data file name
DIGITS_PKL = "digit-clf.pkl"

#Read handwritten digit data
digits = datasets.load_digits()
#Cross-validation
#Randomly divide the data into training and testing
data_train, data_test, label_train, label_test = \
    model_selection.train_test_split(digits.data, digits.target)

#Create a predictive model
def create_model():
    #Model building
    clf = svm.SVC(gamma=0.001)
    # clf = svm.LinearSVC()
    # from sklearn.ensemble import RandomForestClassifier
    # clf = RandomForestClassifier()
    #Learning
    clf.fit(data_train, label_train)
    #Save Predictive Model
    joblib.dump(clf, DIGITS_PKL)
    print("Saved the prediction model=", DIGITS_PKL)
    return clf

#Select a forecast model
def select_model():
    #Load model file
    if not os.path.exists(DIGITS_PKL):
        clf = create_model() #Generate without model
    clf = joblib.load(DIGITS_PKL)
    return clf

#Predict numbers from data
def predict_digits(data,clf):
    n = clf.predict([data])
    print("judgment result=", n)

#Convert handwritten digit images to 8x8 grayscale data array
def image_to_data(imagefile):
    image = Image.open(imagefile).convert('L') #Grayscale conversion
    image = image.resize((8, 8), Image.ANTIALIAS)
    img = np.asarray(image, dtype=float)
    img = np.floor(16 - 16 * (img / 256)) #Line example operation
    #Display the converted image
    plt.imshow(img)
    plt.gray()
    plt.show()

    img = img.flatten()
    print("img=",img)
    return img

#Evaluate the model
def evaluate_model(clf):
    predict = clf.predict(data_test)
    return predict

#Create a report from forecasts
def show_report(predict, clf):
    ac_score = metrics.accuracy_score(label_test, predict)
    cl_report = metrics.classification_report(label_test, predict)
    print('Sorter information =', clf)
    print('Correct answer rate =', ac_score)
    print('Report =', cl_report)
    # precision:accuracy, recall:Recall rate (correct answer rate),
    # f1-score:Harmonic mean of accuracy and recall, support:Number of data on the correct label

def main():
    #Get command line arguments
    if len(sys.argv) <= 1:
        print("USAGE:")
        print("python3 predict_digit.py imagefile")
        return
    imagefile = sys.argv[1]
    data = image_to_data(imagefile)
    clf = select_model();
    predict_digits(data,clf)
    show_report(evaluate_model(clf),clf)

if __name__ == '__main__':
    main()

`result`


img= [ 0.  0.  0.  0.  0.  0.  0.  0.  1.  9.  7.  7.  7.  7.  2.  0.  1.  8.
  0.  1.  0.  0.  0.  0.  1.  6.  0.  0.  0.  0.  0.  0.  1.  9.  5.  6.
  5.  1.  0.  0.  0.  4.  3.  3.  4.  8.  1.  0.  0.  0.  0.  0.  2.  9.
  2.  0.  0.  3.  8.  8.  8.  2.  0.  0.]
judgment result= [5]
Sorter information = SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma=0.001, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)
Correct answer rate = 0.993333333333
Report = precision recall f1-score   support

          0       1.00      1.00      1.00        38
          1       1.00      1.00      1.00        48
          2       1.00      1.00      1.00        40
          3       0.98      0.98      0.98        47
          4       1.00      1.00      1.00        54
          5       0.98      0.98      0.98        47
          6       0.98      1.00      0.99        46
          7       1.00      1.00      1.00        42
          8       1.00      1.00      1.00        47
          9       1.00      0.98      0.99        41

avg / total       0.99      0.99      0.99       450