# 1.介绍

• 这篇文章适合那些刚接触Kaggle、想尽快熟悉Kaggle并且独立完成一个竞赛项目的同学。本文以湖南农业大学数据专业kaggle竞赛为例，入门讲解一篇竞赛的全流程与代码简介。
• https://www.kaggle.com/competitions/classifying-the-fashion-mnist/submit
• 导库如下：
``````import paddle
import paddle.vision.transforms as T
import pandas as pd
import numpy as np
from numpy import array
import matplotlib.pyplot as plt
import paddle.nn.functional as F
``````

# 3.数据集封装

``````from paddle.io import Dataset
class myDataset(Dataset):
def __init__(self, data=None,mode='train',transform=None,val_split=0.2):#初始化
if mode in ['train', 'val']:
np.random.seed(43)
data=array(data)
np.random.shuffle(data)
data_len = len(data)
val_set_size = int(data_len*val_split)
if mode == 'val':
data = data[:val_set_size,:]
elif mode == 'train':
data = data[val_set_size:,:]
elif mode == 'test':
data=array(data)

train_label=data[:,0]
self.label = train_label
self.data=data[:,1:]

def __getitem__(self, idx):#实现__getitem__方法，定义指定index时如何获取数据，并返回单条数据（训练数据，对应的标签）
image=self.data[idx]
label=self.label[idx]

image=image.reshape(1,28,28)
image=(image-127.5)/127.5
image = np.array(image, dtype='float32')
return image, int(label)
def __len__(self):

return len(self.label) # 返回数据集大小，即图片的数量
``````
``````train_dataset=myDataset()#读取训练集
val_dataset=myDataset(mode='val')#验证集
test_dataset=myDataset(mode='test')#测试集
``````

# 4.模型组建

``````class LeNet(paddle.nn.Layer):
def __init__(self):#初始化我们所需要用到的网络层结构
super(LeNet, self).__init__()

self.conv1 = paddle.nn.Conv2D(in_channels=1, out_channels=6, kernel_size=5, stride=1, padding=2)
self.max_pool1 = paddle.nn.MaxPool2D(kernel_size=2,  stride=2)
self.conv2 = paddle.nn.Conv2D(in_channels=6, out_channels=16, kernel_size=5, stride=1)
self.max_pool2 = paddle.nn.MaxPool2D(kernel_size=2, stride=2)
self.linear1 = paddle.nn.Linear(in_features=16*5*5, out_features=120)
self.linear2 = paddle.nn.Linear(in_features=120, out_features=84)
self.linear3 = paddle.nn.Linear(in_features=84, out_features=10)

def forward(self, inputs):#将网络按照顺序‘联’起来
y = self.conv1(inputs)
y = F.relu(y)
y = self.max_pool1(y)
y = F.relu(y)
y = self.conv2(y)
y = self.max_pool2(y)
y = paddle.flatten(y, start_axis=1,stop_axis=-1)
y = self.linear1(y)
y = F.relu(y)
y = self.linear2(y)
y = F.relu(y)
y = self.linear3(y)

return y
``````

``````model = paddle.Model(LeNet())
model.summary((-1,1,28, 28))
``````

## 4.2模型编译（优化器与损失函数）

``````model.prepare(paddle.optimizer.Adam(learning_rate=0.0001, parameters=model.parameters()),
``````

# 5.模型训练

``````model.fit(train_data=train_dataset, epochs=50, batch_size=30, verbose=1,eval_data=val_dataset)
``````

• epochs, batch_size,learning_rate等超参数大家都可以自行设定，试一试能不能使模型更好。

# 6.最后–结果的输出

``````pred_result=model.predict(test_dataset)
#将我们的测试集用训练好的模型预测，结果存在pred_result
``````
``````re=array(pred_result[0])[:,0,:].argmax(1)
df=pd.DataFrame(re)
df.to_csv('result.csv')
``````

THE END