文章目录

前言
1.导入包和数据
2.Exploratory Data Analysis (EDA)数据探索
3.划分数据集
4.KNN
5.ANN

前言

利用IRIS数据集，基于pytorch，练习构建简单的ANN网络
（又名：放假了把作业整理一下，复习一遍课程

Task
学习用pytorch搭建ANN模型
Dataset
IRIS

1.导入包和数据

from sklearn import datasets
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
url = "https://raw.githubusercontent.com/callxpert/datasets/master/iris.data.txt" 
names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class'] 
dataset = pd.read_csv(url, names=names)

2.Exploratory Data Analysis (EDA)数据探索

# shape
print(dataset.shape)
#Find out the statistical summary of the data 
#including the count, mean, the min and max values as well as some percentiles.
print(dataset.describe())
print(dataset.info)

在这里插入图片描述

#class distribution
print(dataset.groupby('class').size())

在这里插入图片描述

# box and whisker plots
dataset.plot(kind='box', subplots=True, layout=(2,2), sharex=False, sharey=False)
plt.show()

在这里插入图片描述

ax = sns.boxplot(x="class", y="sepal-length", data=dataset)
ax = sns.stripplot(x="class", y="sepal-length", data=dataset, jitter=True, edgecolor="gray")

在这里插入图片描述

# histograms
dataset.hist()
plt.show()

在这里插入图片描述

#build a scatter plot which shows us the correlation with respect to other features.
## scatter plot matrix
from pandas.plotting import scatter_matrix
scatter_matrix(dataset)
plt.show()

在这里插入图片描述

sns.pairplot(dataset, hue="class", size=3)

在这里插入图片描述

#Scatter plot of sepal-length and sepal-width features
sns.FacetGrid(dataset, hue="class", size=5) \
   .map(plt.scatter, "sepal-length", "sepal-width") \
   .add_legend()

在这里插入图片描述

#Scatter plot of petal-length and petal-width features
sns.FacetGrid(dataset, hue="class", size=5) \
   .map(plt.scatter, "petal-length", "petal-width") \
   .add_legend()

在这里插入图片描述

3.划分数据集

from sklearn.model_selection import train_test_split
array = dataset.values
X = array[:,0:4]
Y = array[:,4]
x_train, x_test, y_train, y_test = train_test_split(X,Y, test_size=0.3, random_state=7)

4.KNN

KNN与k-means（待补）

#Build the model
from sklearn import neighbors
classifier=neighbors.KNeighborsClassifier()
#Train the Model
classifier.fit(x_train,y_train)
#Make predictions
predictions=classifier.predict(x_test)
#the accuracy
from sklearn.metrics import accuracy_score
acc = accuracy_score(y_test, predictions)
print(accuracy_score(y_test, predictions))

#Classification Report
from sklearn.metrics import classification_report
print(classification_report(y_test, predictions))

5.ANN

#we bulid model by pytorch 
import torch.nn as nn
import torch.nn.functional as F
class Model(nn.Module):   
    def __init__(self):
        super(Model, self).__init__()
        n_feature=4
        n_hidden=30
        n_output=3
        self.model = nn.Sequential(
                    nn.Linear(n_feature, n_hidden),#
                    nn.ReLU(),
                    nn.Linear(n_hidden, n_hidden),
                    nn.ReLU(),
                    nn.Linear(n_hidden, n_output))
    def forward(self, x):
        out = self.model(x)
        y_pred = F.softmax(out, dim=1)
        return y_pred

model = Model()
print(model)

设定参数和loss function

import torch
optimizer = torch.optim.SGD(model.parameters(), lr=0.001) 
loss_func = nn.CrossEntropyLoss()

训练

#Numerical coding y_train and y_test
from sklearn.preprocessing import LabelEncoder
y_train = LabelEncoder().fit_transform(y_train)
y_test = LabelEncoder().fit_transform(y_test)
#training and testing of dataset

for epoch in range(50000):
    train_loss = 0
    train_acc = 0
    model.train()
    #Forward
    out = model(torch.tensor(x_train.astype(float)).float())
    loss = loss_func(out, torch.tensor(y_train.astype(float)).long())
    #back
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    if epoch%2000==0:
        print(epoch, loss.item())
    train_loss += loss.item()

结果

#training accuracy
model.eval()
#out = model(img)
#loss = criterion(out, label)
eval_loss = 0
eval_acc = 0
out = model(torch.tensor(x_test.astype(float)).float())
loss = loss_func(out, torch.tensor(y_test.astype(float)).long())
label = torch.tensor(y_test.astype(float)).long()
eval_loss += loss.data.item()*label.size(0) 
_, pred = torch.max(out, 1) # 取最高概率的标签为预测值
num_correct = (pred == label).sum() # 计算预测正确的标签数
eval_acc += num_correct.item() # 计算正确率

测试

testloss = eval_loss / (len(y_test))
acc=eval_acc / (len(y_test))
print('Test Loss: {:.12f}, Acc: {:.12f}'.format(testloss,acc))

原文链接：https://blog.csdn.net/weixin_49221232/article/details/121472368