自学内容网 自学内容网

第P2周:Pytorch实现CIFAR10彩色图片识别

目标

  1. 实现CIFAR-10的彩色图片识别
  2. 实现比P1周更复杂一点的CNN网络

具体实现

(一)环境

语言环境:Python 3.10
编 译 器: PyCharm
框 架: Pytorch 2.5.1

(二)具体步骤
1.
import torch  
import torch.nn as nn  
import matplotlib.pyplot as plt  
import torchvision  
  
# 第一步:设置GPU  
def USE_GPU():  
    if torch.cuda.is_available():  
        print('CUDA is available, will use GPU')  
        device = torch.device("cuda")  
    else:  
        print('CUDA is not available. Will use CPU')  
        device = torch.device("cpu")  
  
    return device  
  
device = USE_GPU()  

输出:CUDA is available, will use GPU

  
# 第二步:导入数据。同样的CIFAR-10也是torch内置了,可以自动下载  
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True,  
                                             transform=torchvision.transforms.ToTensor())  
test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True,  
                                            transform=torchvision.transforms.ToTensor())  
  
  
batch_size = 32  
train_dataload = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)  
test_dataload = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size)  
  
# 取一个批次查看数据格式  
# 数据的shape为:[batch_size, channel, height, weight]  
# 其中batch_size为自己设定,channel,height和weight分别是图片的通道数,高度和宽度。  
imgs, labels = next(iter(train_dataload))  
print(imgs.shape)  
  
# 查看一下图片  
import numpy as np  
plt.figure(figsize=(20, 5))  
for i, images in enumerate(imgs[:20]):  
    # 使用numpy的transpose将张量(C,H, W)转换成(H, W, C),便于可视化处理  
    npimg = imgs.numpy().transpose((1, 2, 0))  
    # 将整个figure分成2行10列,并绘制第i+1个子图  
    plt.subplot(2, 10, i+1)  
    plt.imshow(npimg, cmap=plt.cm.binary)  
    plt.axis('off')  
plt.show()  

输出:
Files already downloaded and verified
Files already downloaded and verified
torch.Size([32, 3, 32, 32])
image.png

# 第三步,构建CNN网络  
import torch.nn.functional as F  
  
num_classes = 10  # 因为CIFAR-10是10种类型  
class Model(nn.Module):  
    def __init__(self):  
        super(Model, self).__init__()  
        # 提取特征网络  
        self.conv1 = nn.Conv2d(3, 64, 3)  
        self.pool1 = nn.MaxPool2d(kernel_size=2)  
        self.conv2 = nn.Conv2d(64, 64, 3)  
        self.pool2 = nn.MaxPool2d(kernel_size=2)  
        self.conv3 = nn.Conv2d(64, 128, 3)  
        self.pool3 = nn.MaxPool2d(kernel_size=2)  
  
        # 分类网络  
        self.fc1 = nn.Linear(512, 256)  
        self.fc2 = nn.Linear(256, num_classes)  
  
    # 前向传播  
    def forward(self, x):  
        x = self.pool1(F.relu(self.conv1(x)))  
        x = self.pool2(F.relu(self.conv2(x)))  
        x = self.pool3(F.relu(self.conv3(x)))  
  
        x = torch.flatten(x, 1)  
  
        x = F.relu(self.fc1(x))  
        x = self.fc2(x)  
  
        return x  
  
from torchinfo import summary  
# 将模型转移到GPU中  
model = Model().to(device)  
summary(model)  

image.png

# 训练模型  
loss_fn = nn.CrossEntropyLoss() # 创建损失函数  
learn_rate = 1e-2   # 设置学习率  
opt = torch.optim.SGD(model.parameters(), lr=learn_rate)    # 设置优化器  
  
# 编写训练函数  
def train(dataloader, model, loss_fn, optimizer):  
    size = len(dataloader.dataset) # 训练集的大小 ,这里一共是60000张图片  
    num_batches = len(dataloader)   # 批次大小,这里是1875(60000/32=1875)  
  
    train_acc, train_loss = 0, 0    # 初始化训练正确率和损失率都为0  
  
    for X, y in dataloader: # 获取图片及标签,X-图片,y-标签(也是实际值)  
        X, y = X.to(device), y.to(device)  
  
        # 计算预测误差  
        pred = model(X) # 网络输出预测值  
        loss = loss_fn(pred, y) # 计算网络输出的预测值和实际值之间的差距  
  
        # 反向传播  
        optimizer.zero_grad()   # grad属性归零  
        loss.backward() # 反向传播  
        optimizer.step()    # 第一步自动更新  
  
        # 记录正确率和损失率  
        train_acc += (pred.argmax(1) == y).type(torch.float).sum().item()  
        train_loss += loss.item()  
  
    train_acc /= size  
    train_loss /= num_batches  
  
    return train_acc, train_loss  
  
# 测试函数  
def test(dataloader, model, loss_fn):  
    size = len(dataloader.dataset) # 测试集大小,这里一共是10000张图片  
    num_batches = len(dataloader)   # 批次大小 ,这里312,即10000/32=312.5,向上取整  
    test_acc, test_loss = 0, 0  
  
    # 因为是测试,因此不用训练,梯度也不用计算不用更新  
    with torch.no_grad():  
        for imgs, target in dataloader:  
            imgs, target = imgs.to(device), target.to(device)  
  
            # 计算loss  
            target_pred = model(imgs)  
            loss = loss_fn(target_pred, target)  
  
            test_loss += loss.item()  
            test_acc += (target_pred.argmax(1) == target).type(torch.float).sum().item()  
  
    test_acc /= size  
    test_loss /= num_batches  
  
    return test_acc, test_loss  
  
# 正式训练  
epochs = 10  
train_acc, train_loss, test_acc, test_loss = [], [], [], []  
  
for epoch in range(epochs):  
    model.train()  
    epoch_train_acc, epoch_train_loss = train(train_dataload, model, loss_fn, opt)  
  
    model.eval()  
    epoch_test_acc, epoch_test_loss = test(test_dataload, model, loss_fn)  
  
    train_acc.append(epoch_train_acc)  
    train_loss.append(epoch_train_loss)  
    test_acc.append(epoch_test_acc)  
    test_loss.append(epoch_test_loss)  
  
    template = 'Epoch:{:2d}, 训练正确率:{:.1f}%, 训练损失率:{:.3f}, 测试正确率:{:.1f}%, 测试损失率:{:.3f}'  
    print(template.format(epoch+1, epoch_train_acc * 100, epoch_train_loss, epoch_test_acc*100, epoch_test_loss))  
  
print('Done')  
  
# 结果可视化  
# 隐藏警告  
import warnings  
warnings.filterwarnings('ignore')   # 忽略警告信息  
plt.rcParams['font.sans-serif'] = ['SimHei']    # 正常显示中文标签  
plt.rcParams['axes.unicode_minus'] = False  # 正常显示+/-号  
plt.rcParams['figure.dpi'] = 100    # 分辨率  
  
epochs_range = range(epochs)  
  
plt.figure(figsize=(12, 3))  
  
plt.subplot(1, 2, 1)    # 第一张子图  
plt.plot(epochs_range, train_acc, label='训练正确率')  
plt.plot(epochs_range, test_acc, label='测试正确率')  
plt.legend(loc='lower right')  
plt.title('训练和测试正确率比较')  
  
plt.subplot(1, 2, 2)    # 第二张子图  
plt.plot(epochs_range, train_loss, label='训练损失率')  
plt.plot(epochs_range, test_loss, label='测试损失率')  
plt.legend(loc='upper right')  
plt.title('训练和测试损失率比较')  
  
plt.show()

# 保存模型  
torch.save(model, './models/cnn-cifar10.pth')

image.png
再次设置epochs为50训练结果:
image.png
epochs增加到100,训练结果:
image.png
可以看到训练集和测试集的差距有点大,不太理想。做一下数据增加试试:

data_transforms= {  
    'train': transforms.Compose([  
        transforms.RandomHorizontalFlip(),  
        transforms.ToTensor(),  
    ]),  
    'test': transforms.Compose([  
       transforms.ToTensor(),  
    ])  
}

在dataset中:

train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True,  transform=data_transforms['train'])  
test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=data_transforms['test'])

运行结果:
image.png
image.png
比较漂亮了,再调整batch_size=16和epochs=20,提高了近6个百分点。
image.png
batch_size=16,epochs=50:有第20轮左右的时候,验证集的确认性基本就没有再提高了。和上面基本一样。
image.png

(三)总结
  1. epochs并不是越多越好。batch_size同样的道理
  2. 数据增强确实可以提高模型训练的准确性。

原文地址:https://blog.csdn.net/deflag/article/details/144439207

免责声明:本站文章内容转载自网络资源,如本站内容侵犯了原著者的合法权益,可联系本站删除。更多内容请关注自学内容网(zxcms.com)!