【PyTorch】多项式回归

1. 模型与代码实现

1.1. 模型

  • 将多项式特征值预处理为线性模型的特征值。即

    y

    =

    w

    0

    +

    w

    1

    x

    +

    w

    2

    x

    2

    +

    +

    w

    n

    x

    n

    y = w_0+w_1x+w_2x^2+dots+w_nx^n

    y=w0+w1x+w2x2++wnxn变换为

    y

    =

    w

    0

    +

    w

    1

    z

    1

    +

    w

    2

    z

    2

    +

    +

    w

    n

    z

    n

    y=w_0+w_1z_1+w_2z_2+dots+w_nz_n

    y=w0+w1z1+w2z2++wnzn

  • 为了避免指数值过大,可以将

    x

    i

    x^i

    xi调整为

    x

    i

    i

    !

    frac{x^i}{i!}

    i!xi,即

    y

    =

    w

    0

    +

    w

    1

    x

    1

    !

    +

    w

    2

    x

    2

    2

    !

    +

    +

    w

    n

    x

    n

    n

    !

    y = w_0+w_1frac{x}{1!}+w_2frac{x^2}{2!}+dots+w_nfrac{x^n}{n!}

    y=w0+w11!x+w22!x2++wnn!xn

1.2. 代码实现

1.2.1. 完整代码

import os
import numpy as np
import math, torch
from d2l import torch as d2l
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from tensorboardX import SummaryWriter
from rich.progress import track

def evaluate_loss(dataloader):
    """评估给定数据集上模型的损失"""
    metric.reset()
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device, non_blocking=True), y.to(device, non_blocking=True)
            loss = criterion(net(X), y)
            metric.add(loss.sum(), loss.numel())
        return metric[0] / metric[1]

def load_dataset(data_arrays):
    """加载数据集"""
    dataset = TensorDataset(*data_arrays)
    return DataLoader(dataset, batch_size, shuffle=True, pin_memory=True,
        num_workers=num_workers, prefetch_factor=prefetch_factor)


if __name__ == '__main__':
    # 全局参数设置
    learning_rate = 0.01
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    num_epochs = 400
    batch_size = 10
    num_workers = 0
    prefetch_factor = 2

    max_degree = 20             # 多项式最高阶数
    model_degree = 1           # 多项式模型阶数
    n_train, n_test = 100, 100  # 训练集和测试集大小

    true_w = np.zeros(max_degree+1)
    true_w[0:4] = np.array([5, 1.2, -3.4, 5.6])

    # 创建记录器
    def get_logdir():
        root = 'runs'
        if not os.path.exists(root):
            os.mkdir(root)
        order = len(os.listdir(root)) + 1
        return f'runs/exp{order}'
    writer = SummaryWriter(get_logdir())

    # 生成数据集
    features = np.random.normal(size=(n_train + n_test, 1))
    np.random.shuffle(features)
    poly_features = np.power(features, np.arange(max_degree+1).reshape(1, -1))
    for i in range(max_degree+1):
        poly_features[:, i] /= math.gamma(i + 1)  # gamma(n)=(n-1)!
    labels = np.dot(poly_features, true_w)
    labels += np.random.normal(scale=0.1, size=labels.shape)    # 加高斯噪声服从N(0, 0.01)

    poly_features, labels = [
        torch.as_tensor(x, dtype=torch.float32) for x in [
            poly_features, labels]]
    
    # 创建模型
    net = nn.Sequential(nn.Linear(model_degree+1, 1, bias=False)).to(device, non_blocking=True)
    def init_weights(m):
        if type(m) == nn.Linear:
            nn.init.normal_(m.weight, mean=0, std=0.01)
    net.apply(init_weights)
    criterion = nn.MSELoss(reduction='none')
    optimizer = torch.optim.SGD(net.parameters(), lr=learning_rate)

    # 加载数据集
    features_train, labels_train = poly_features[:n_train, :model_degree+1], labels[:n_train].reshape(-1, 1)
    features_test, labels_test = poly_features[n_train:, :model_degree+1], labels[n_train:].reshape(-1, 1)
    dataloader_train = load_dataset((features_train, labels_train))
    dataloader_test = load_dataset((features_test, labels_test))
    
    # 训练循环
    metric = d2l.Accumulator(2)  # 损失的总和, 样本数量
    for epoch in track(range(num_epochs)):
        for X, y in dataloader_train:
            X, y = X.to(device, non_blocking=True), y.to(device, non_blocking=True)
            loss = criterion(net(X), y)
            optimizer.zero_grad()
            loss.mean().backward()
            optimizer.step()

        writer.add_scalars(f"{model_degree}-degree", {
            "train_loss": evaluate_loss(dataloader_train),
            "test_loss": evaluate_loss(dataloader_test),
        }, epoch)
    print("weights =", net[0].weight.data.cpu().numpy())

    writer.close()

1.2.2. 输出结果

  • 采用1阶多项式(线性模型)拟合:
    1degree

  • 采用3阶多项式拟合
    3degree

  • 采用20阶多项式拟合
    20degree

2. Q&A

2.1. 欠拟合与过拟合

数据集是按照3阶多项式生成的。使用1阶多项式去拟合,发现最后损失始终降不下去,这种情况称为欠拟合,说明模型复杂度不够;使用20阶多项式去拟合,发现测试损失最后还增长了,训练和测试损失总体也比3阶多项式模型的值高,这种情况称为过拟合,说明模型太复杂了,训练过程受到了噪声的影响。

本图文内容来源于网友网络收集整理提供,作为学习参考使用,版权属于原作者。
THE END
分享
二维码
< <上一篇
下一篇>>