机器学习项目IRIS dataset,ANN,pytorch


前言

利用IRIS数据集,基于pytorch,练习构建简单的ANN网络
(又名:放假了把作业整理一下,复习一遍课程


Task
学习用pytorch搭建ANN模型
Dataset
IRIS

1.导入包和数据

from sklearn import datasets
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
url = "https://raw.githubusercontent.com/callxpert/datasets/master/iris.data.txt" 
names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class'] 
dataset = pd.read_csv(url, names=names)

2.Exploratory Data Analysis (EDA)数据探索

# shape
print(dataset.shape)
#Find out the statistical summary of the data 
#including the count, mean, the min and max values as well as some percentiles.
print(dataset.describe())
print(dataset.info)

在这里插入图片描述

#class distribution
print(dataset.groupby('class').size())

在这里插入图片描述

# box and whisker plots
dataset.plot(kind='box', subplots=True, layout=(2,2), sharex=False, sharey=False)
plt.show()

在这里插入图片描述

ax = sns.boxplot(x="class", y="sepal-length", data=dataset)
ax = sns.stripplot(x="class", y="sepal-length", data=dataset, jitter=True, edgecolor="gray")

在这里插入图片描述

# histograms
dataset.hist()
plt.show()

在这里插入图片描述

#build a scatter plot which shows us the correlation with respect to other features.
## scatter plot matrix
from pandas.plotting import scatter_matrix
scatter_matrix(dataset)
plt.show()

在这里插入图片描述

sns.pairplot(dataset, hue="class", size=3)

在这里插入图片描述

#Scatter plot of sepal-length and sepal-width features
sns.FacetGrid(dataset, hue="class", size=5) \
   .map(plt.scatter, "sepal-length", "sepal-width") \
   .add_legend()

在这里插入图片描述

#Scatter plot of petal-length and petal-width features
sns.FacetGrid(dataset, hue="class", size=5) \
   .map(plt.scatter, "petal-length", "petal-width") \
   .add_legend()

在这里插入图片描述

3.划分数据集

from sklearn.model_selection import train_test_split
array = dataset.values
X = array[:,0:4]
Y = array[:,4]
x_train, x_test, y_train, y_test = train_test_split(X,Y, test_size=0.3, random_state=7)

4.KNN

KNN与k-means(待补)

#Build the model
from sklearn import neighbors
classifier=neighbors.KNeighborsClassifier()
#Train the Model
classifier.fit(x_train,y_train)
#Make predictions
predictions=classifier.predict(x_test)
#the accuracy
from sklearn.metrics import accuracy_score
acc = accuracy_score(y_test, predictions)
print(accuracy_score(y_test, predictions))
#Classification Report
from sklearn.metrics import classification_report
print(classification_report(y_test, predictions))

5.ANN

#we bulid model by pytorch 
import torch.nn as nn
import torch.nn.functional as F
class Model(nn.Module):   
    def __init__(self):
        super(Model, self).__init__()
        n_feature=4
        n_hidden=30
        n_output=3
        self.model = nn.Sequential(
                    nn.Linear(n_feature, n_hidden),#
                    nn.ReLU(),
                    nn.Linear(n_hidden, n_hidden),
                    nn.ReLU(),
                    nn.Linear(n_hidden, n_output))
    def forward(self, x):
        out = self.model(x)
        y_pred = F.softmax(out, dim=1)
        return y_pred
model = Model()
print(model)

设定参数和loss function

import torch
optimizer = torch.optim.SGD(model.parameters(), lr=0.001) 
loss_func = nn.CrossEntropyLoss() 

训练

#Numerical coding y_train and y_test
from sklearn.preprocessing import LabelEncoder
y_train = LabelEncoder().fit_transform(y_train)
y_test = LabelEncoder().fit_transform(y_test)
#training and testing of dataset

for epoch in range(50000):
    train_loss = 0
    train_acc = 0
    model.train()
    #Forward
    out = model(torch.tensor(x_train.astype(float)).float())
    loss = loss_func(out, torch.tensor(y_train.astype(float)).long())
    #back
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    if epoch%2000==0:
        print(epoch, loss.item())
    train_loss += loss.item()

结果

#training accuracy
model.eval()
#out = model(img)
#loss = criterion(out, label)
eval_loss = 0
eval_acc = 0
out = model(torch.tensor(x_test.astype(float)).float())
loss = loss_func(out, torch.tensor(y_test.astype(float)).long())
label = torch.tensor(y_test.astype(float)).long()
eval_loss += loss.data.item()*label.size(0) 
_, pred = torch.max(out, 1) # 取最高概率的标签为预测值
num_correct = (pred == label).sum() # 计算预测正确的标签数
eval_acc += num_correct.item() # 计算正确率

测试

testloss = eval_loss / (len(y_test))
acc=eval_acc / (len(y_test))
print('Test Loss: {:.12f}, Acc: {:.12f}'.format(testloss,acc))

版权声明:本文为weixin_49221232原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。