Python textbook to acquire practical skills
If you throw in an appropriate number image with python3 digits.py $ {fileName}
, it will predict.
digits.py
import os, sys, math
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets, model_selection, svm, metrics
from sklearn.externals import joblib
from PIL import Image
#Model data file name
DIGITS_PKL = "digit-clf.pkl"
#Read handwritten digit data
digits = datasets.load_digits()
#Cross-validation
#Randomly divide the data into training and testing
data_train, data_test, label_train, label_test = \
model_selection.train_test_split(digits.data, digits.target)
#Create a predictive model
def create_model():
#Model building
clf = svm.SVC(gamma=0.001)
# clf = svm.LinearSVC()
# from sklearn.ensemble import RandomForestClassifier
# clf = RandomForestClassifier()
#Learning
clf.fit(data_train, label_train)
#Save Predictive Model
joblib.dump(clf, DIGITS_PKL)
print("Saved the prediction model=", DIGITS_PKL)
return clf
#Select a forecast model
def select_model():
#Load model file
if not os.path.exists(DIGITS_PKL):
clf = create_model() #Generate without model
clf = joblib.load(DIGITS_PKL)
return clf
#Predict numbers from data
def predict_digits(data,clf):
n = clf.predict([data])
print("judgment result=", n)
#Convert handwritten digit images to 8x8 grayscale data array
def image_to_data(imagefile):
image = Image.open(imagefile).convert('L') #Grayscale conversion
image = image.resize((8, 8), Image.ANTIALIAS)
img = np.asarray(image, dtype=float)
img = np.floor(16 - 16 * (img / 256)) #Line example operation
#Display the converted image
plt.imshow(img)
plt.gray()
plt.show()
img = img.flatten()
print("img=",img)
return img
#Evaluate the model
def evaluate_model(clf):
predict = clf.predict(data_test)
return predict
#Create a report from forecasts
def show_report(predict, clf):
ac_score = metrics.accuracy_score(label_test, predict)
cl_report = metrics.classification_report(label_test, predict)
print('Sorter information =', clf)
print('Correct answer rate =', ac_score)
print('Report =', cl_report)
# precision:accuracy, recall:Recall rate (correct answer rate),
# f1-score:Harmonic mean of accuracy and recall, support:Number of data on the correct label
def main():
#Get command line arguments
if len(sys.argv) <= 1:
print("USAGE:")
print("python3 predict_digit.py imagefile")
return
imagefile = sys.argv[1]
data = image_to_data(imagefile)
clf = select_model();
predict_digits(data,clf)
show_report(evaluate_model(clf),clf)
if __name__ == '__main__':
main()
result
img= [ 0. 0. 0. 0. 0. 0. 0. 0. 1. 9. 7. 7. 7. 7. 2. 0. 1. 8.
0. 1. 0. 0. 0. 0. 1. 6. 0. 0. 0. 0. 0. 0. 1. 9. 5. 6.
5. 1. 0. 0. 0. 4. 3. 3. 4. 8. 1. 0. 0. 0. 0. 0. 2. 9.
2. 0. 0. 3. 8. 8. 8. 2. 0. 0.]
judgment result= [5]
Sorter information = SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
decision_function_shape=None, degree=3, gamma=0.001, kernel='rbf',
max_iter=-1, probability=False, random_state=None, shrinking=True,
tol=0.001, verbose=False)
Correct answer rate = 0.993333333333
Report = precision recall f1-score support
0 1.00 1.00 1.00 38
1 1.00 1.00 1.00 48
2 1.00 1.00 1.00 40
3 0.98 0.98 0.98 47
4 1.00 1.00 1.00 54
5 0.98 0.98 0.98 47
6 0.98 1.00 0.99 46
7 1.00 1.00 1.00 42
8 1.00 1.00 1.00 47
9 1.00 0.98 0.99 41
avg / total 0.99 0.99 0.99 450
Recommended Posts