import numpy as np
import pandas as pd
#from keras.utils import np_utils
# 匯入資料
from keras.datasets import mnist
(x_train_image,y_train_label),(x_test_image,y_test_label)=mnist.load_data()
print('train data= ',len(x_train_image))
print('test data=', len(x_test_image))

train data=  60000
test data= 10000


import matplotlib.pyplot as plt

# 建立函數要來畫多圖的
def plot_images_labels_prediction(images,labels,prediction,idx,num=10):
  # 設定顯示圖形的大小
  fig= plt.gcf()
  fig.set_size_inches(12,14)
  # 最多25張
  if num>25:num=25
  # 一張一張畫
  for i in range(0,num):
    # 建立子圖形5*5(五行五列)
    ax=plt.subplot(5,5,i+1)
    # 畫出子圖形
    ax.imshow(images[idx],cmap='binary')
    # 標題和label
    title="label=" +str(labels[idx])
    # 如果有傳入預測結果也顯示
    if len(prediction)>0:
      title+=",predict="+str(prediction[idx])
    # 設定子圖形的標題大小
    ax.set_title(title,fontsize=10)
    # 設定不顯示刻度
    ax.set_xticks([]);ax.set_yticks([])
    idx+=1
  plt.show()
plot_images_labels_prediction(x_train_image,y_train_label,[],0,10)


# 影像資料--------------------------------------
# 代表 train image 總共有6萬張，每一張是28*28的圖片
# label 也有6萬個
# 所以要把二維的圖片矩陣先轉換成一維
# 這裡的784是因為 28*28
x_Train=x_train_image.reshape(60000,784).astype('float32')
x_Test=x_test_image.reshape(10000,784).astype('float32')


# 由於是圖片最大的是255，所以全部除以255，使其變成0-1的數值
x_Train_normalize=x_Train/255
x_Test_normalize=x_Test/255

# 標註資料--------------------------------------
#y_TrainOneHot=np_utils.to_categorical(y_train_label)
#y_TestOneHot=np_utils.to_categorical(y_test_label)


# Standard scientific Python imports
import matplotlib.pyplot as plt

# Import datasets, classifiers and performance metrics
from sklearn import datasets, svm, metrics
from sklearn.model_selection import train_test_split
from sklearn.neighbors import NearestCentroid, KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier


model=NearestCentroid()
#model = GaussianNB()
#model = DecisionTreeClassifier()
#model = LogisticRegression()
# fit the model
model.fit(x_Train_normalize, y_train_label)

# Predict the value of the digit on the test subset
predicted = model.predict(x_Test_normalize)
# Display the confusion matrix
disp = metrics.ConfusionMatrixDisplay.from_predictions(y_test_label, predicted,cmap='CMRmap_r')
accu = metrics.accuracy_score(y_test_label, predicted)
disp.figure_.suptitle("Confusion Matrix of Nearest Neighbour Method")
#print(f"Confusion matrix:\n{disp.confusion_matrix}")
print('Overall Acuracy:{:.3}'.format(accu))

Overall Acuracy:0.82


model = GaussianNB()
#model = DecisionTreeClassifier()
#model = LogisticRegression()
# fit the model
model.fit(x_Train_normalize, y_train_label)

# Predict the value of the digit on the test subset
predicted = model.predict(x_Test_normalize)
# Display the confusion matrix
disp = metrics.ConfusionMatrixDisplay.from_predictions(y_test_label, predicted,cmap='CMRmap_r')
accu = metrics.accuracy_score(y_test_label, predicted)
disp.figure_.suptitle("Confusion Matrix of Bayesian")
#print(f"Confusion matrix:\n{disp.confusion_matrix}")
print('Overall Acuracy:{:.3}'.format(accu))

Overall Acuracy:0.556


model = DecisionTreeClassifier()
#model = LogisticRegression()
# fit the model
model.fit(x_Train_normalize, y_train_label)

# Predict the value of the digit on the test subset
predicted = model.predict(x_Test_normalize)
# Display the confusion matrix
disp = metrics.ConfusionMatrixDisplay.from_predictions(y_test_label, predicted,cmap='CMRmap_r')
accu = metrics.accuracy_score(y_test_label, predicted)
disp.figure_.suptitle("Confusion Matrix of Decision Tree Classifier")
#print(f"Confusion matrix:\n{disp.confusion_matrix}")
print('Overall Acuracy:{:.3}'.format(accu))

Overall Acuracy:0.877


model = LogisticRegression()
# fit the model
model.fit(x_Train_normalize, y_train_label)

# Predict the value of the digit on the test subset
predicted = model.predict(x_Test_normalize)
# Display the confusion matrix
disp = metrics.ConfusionMatrixDisplay.from_predictions(y_test_label, predicted,cmap='CMRmap_r')
accu = metrics.accuracy_score(y_test_label, predicted)
disp.figure_.suptitle("Confusion Matrix of Logistic egression")
#print(f"Confusion matrix:\n{disp.confusion_matrix}")
print('Overall Acuracy:{:.3}'.format(accu))

/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(

Overall Acuracy:0.926


model=KNeighborsClassifier(n_neighbors=10)

# fit the model
model.fit(x_Train_normalize, y_train_label)

# Predict the value of the digit on the test subset
predicted = model.predict(x_Test_normalize)
# Display the confusion matrix
disp = metrics.ConfusionMatrixDisplay.from_predictions(y_test_label, predicted,cmap='CMRmap_r')
accu = metrics.accuracy_score(y_test_label, predicted)
disp.figure_.suptitle("Confusion Matrix of Nearest Neighbour Method")
#print(f"Confusion matrix:\n{disp.confusion_matrix}")
print('Overall Acuracy:{:.3}'.format(accu))

Overall Acuracy:0.967

Examples of classification using Python

Use the MNIST dataset as an example.

The methods used here are Nearest Neighbour, Bayesian, Decision Tree, Logistic Regression, KNN