import numpy as np
import pandas as pd
#from keras.utils import np_utils
# 匯入資料
from keras.datasets import mnist
(x_train_image,y_train_label),(x_test_image,y_test_label)=mnist.load_data()
print('train data= ',len(x_train_image))
print('test data=', len(x_test_image))
train data= 60000 test data= 10000
import matplotlib.pyplot as plt
# 建立函數要來畫多圖的
def plot_images_labels_prediction(images,labels,prediction,idx,num=10):
# 設定顯示圖形的大小
fig= plt.gcf()
fig.set_size_inches(12,14)
# 最多25張
if num>25:num=25
# 一張一張畫
for i in range(0,num):
# 建立子圖形5*5(五行五列)
ax=plt.subplot(5,5,i+1)
# 畫出子圖形
ax.imshow(images[idx],cmap='binary')
# 標題和label
title="label=" +str(labels[idx])
# 如果有傳入預測結果也顯示
if len(prediction)>0:
title+=",predict="+str(prediction[idx])
# 設定子圖形的標題大小
ax.set_title(title,fontsize=10)
# 設定不顯示刻度
ax.set_xticks([]);ax.set_yticks([])
idx+=1
plt.show()
plot_images_labels_prediction(x_train_image,y_train_label,[],0,10)
# 影像資料--------------------------------------
# 代表 train image 總共有6萬張,每一張是28*28的圖片
# label 也有6萬個
# 所以要把二維的圖片矩陣先轉換成一維
# 這裡的784是因為 28*28
x_Train=x_train_image.reshape(60000,784).astype('float32')
x_Test=x_test_image.reshape(10000,784).astype('float32')
# 由於是圖片最大的是255,所以全部除以255,使其變成0-1的數值
x_Train_normalize=x_Train/255
x_Test_normalize=x_Test/255
# 標註資料--------------------------------------
#y_TrainOneHot=np_utils.to_categorical(y_train_label)
#y_TestOneHot=np_utils.to_categorical(y_test_label)
# Standard scientific Python imports
import matplotlib.pyplot as plt
# Import datasets, classifiers and performance metrics
from sklearn import datasets, svm, metrics
from sklearn.model_selection import train_test_split
from sklearn.neighbors import NearestCentroid, KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
model=NearestCentroid()
#model = GaussianNB()
#model = DecisionTreeClassifier()
#model = LogisticRegression()
# fit the model
model.fit(x_Train_normalize, y_train_label)
# Predict the value of the digit on the test subset
predicted = model.predict(x_Test_normalize)
# Display the confusion matrix
disp = metrics.ConfusionMatrixDisplay.from_predictions(y_test_label, predicted,cmap='CMRmap_r')
accu = metrics.accuracy_score(y_test_label, predicted)
disp.figure_.suptitle("Confusion Matrix of Nearest Neighbour Method")
#print(f"Confusion matrix:\n{disp.confusion_matrix}")
print('Overall Acuracy:{:.3}'.format(accu))
Overall Acuracy:0.82
model = GaussianNB()
#model = DecisionTreeClassifier()
#model = LogisticRegression()
# fit the model
model.fit(x_Train_normalize, y_train_label)
# Predict the value of the digit on the test subset
predicted = model.predict(x_Test_normalize)
# Display the confusion matrix
disp = metrics.ConfusionMatrixDisplay.from_predictions(y_test_label, predicted,cmap='CMRmap_r')
accu = metrics.accuracy_score(y_test_label, predicted)
disp.figure_.suptitle("Confusion Matrix of Bayesian")
#print(f"Confusion matrix:\n{disp.confusion_matrix}")
print('Overall Acuracy:{:.3}'.format(accu))
Overall Acuracy:0.556
model = DecisionTreeClassifier()
#model = LogisticRegression()
# fit the model
model.fit(x_Train_normalize, y_train_label)
# Predict the value of the digit on the test subset
predicted = model.predict(x_Test_normalize)
# Display the confusion matrix
disp = metrics.ConfusionMatrixDisplay.from_predictions(y_test_label, predicted,cmap='CMRmap_r')
accu = metrics.accuracy_score(y_test_label, predicted)
disp.figure_.suptitle("Confusion Matrix of Decision Tree Classifier")
#print(f"Confusion matrix:\n{disp.confusion_matrix}")
print('Overall Acuracy:{:.3}'.format(accu))
Overall Acuracy:0.877
model = LogisticRegression()
# fit the model
model.fit(x_Train_normalize, y_train_label)
# Predict the value of the digit on the test subset
predicted = model.predict(x_Test_normalize)
# Display the confusion matrix
disp = metrics.ConfusionMatrixDisplay.from_predictions(y_test_label, predicted,cmap='CMRmap_r')
accu = metrics.accuracy_score(y_test_label, predicted)
disp.figure_.suptitle("Confusion Matrix of Logistic egression")
#print(f"Confusion matrix:\n{disp.confusion_matrix}")
print('Overall Acuracy:{:.3}'.format(accu))
/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1): STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. Increase the number of iterations (max_iter) or scale the data as shown in: https://scikit-learn.org/stable/modules/preprocessing.html Please also refer to the documentation for alternative solver options: https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression n_iter_i = _check_optimize_result(
Overall Acuracy:0.926
model=KNeighborsClassifier(n_neighbors=10)
# fit the model
model.fit(x_Train_normalize, y_train_label)
# Predict the value of the digit on the test subset
predicted = model.predict(x_Test_normalize)
# Display the confusion matrix
disp = metrics.ConfusionMatrixDisplay.from_predictions(y_test_label, predicted,cmap='CMRmap_r')
accu = metrics.accuracy_score(y_test_label, predicted)
disp.figure_.suptitle("Confusion Matrix of Nearest Neighbour Method")
#print(f"Confusion matrix:\n{disp.confusion_matrix}")
print('Overall Acuracy:{:.3}'.format(accu))
Overall Acuracy:0.967