基于CNN的人像是否带口罩的分类问题

admin • 2022-03-21 08:10 • 人工智能

由于疫情的影响，口罩已经成为人们生活日常中的必需品，在某些场合中，更要必须佩戴口罩，那么口罩检测就成了必然的问题。今天，我们就来看看，基于卷积神经网络，怎么做一个能够检测人是否戴口罩的demo。

1.首先搭建数据集，结构如下：

Facialmask
        |
        |
        已佩戴口罩
        |       |
        |       |
        |       图片
        |
        |
        未佩戴口罩
        |       |
        |       |
        |       图片
        |
        |
        image.csv

其中image.csv写一个数据集预处理程序搞定：

# -*-coding:utf-8-*-
# @Author: Phantom
# @Email: [email protected]
# @编译环境：windows 10 + python3.8
# @IDE：Pycharm2021.1.3
import csv
import glob
import os
import random

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np

gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(device=gpu, enable=True)


# 加载处理数据集

def load_csv(root, filename, name2label):
    # 从csv文件返回images,labels列表
    # root:数据集根目录，filename:csv文件名， name2label:类别名编码表
    if not os.path.exists(os.path.join(root, filename)):
        # 如果csv文件不存在，则创建
        images = []
        for name in name2label.keys(): # 遍历所有子目录，获得所有的图片
            # 只考虑后缀为png,jpg,jpeg的图片：'pokemon\mewtwo\00001.png
            images += glob.glob(os.path.join(root, name, '*png'))  # glob.glob()字符串匹配
            images += glob.glob(os.path.join(root, name, '*.jpg'))
            images += glob.glob(os.path.join(root, name, '*.jpeg'))
        # 打印数据集信息：1167, 'pokemon\bulbasaur\00000000.png'
        print(len(images), images)
        random.shuffle(images)  # 随机打散顺序
        # 创建csv文件，并存储图片路径及其label信息
        with open(os.path.join(root, filename), mode='w', newline='') as f:
            writer = csv.writer(f)
            for img in images:
                name = img.split(os.sep)[-2]  # 倒数第二个元素(就是name)
                label = name2label[name]

                writer.writerow([img, label])
            print('written into csv file:', filename)

    # 此时已经有csv文件，直接读取
    images, labels = [], []
    with open(os.path.join(root, filename)) as f:
        reader = csv.reader(f)
        for row in reader:
            # 'pokemon\bulbasaur\00000000.png', 0
            img, label = row
            label = int(label)
            images.append(img)
            labels.append(label)
    # 返回图片路径list和标签list
    return images, labels


def load_facialmask(root, mode='train'):
    # 创建数字编码表
    name2label = {}  # 'sq...':0
    # 遍历根目录下的子文件夹，并排序，保证映射关系固定
    for name in  sorted(os.listdir(os.path.join(root))):
        # 跳过非文件夹
        if not os.path.isdir(os.path.join(root, name)):
            continue
        # 给每个类别编码一个数字
        name2label[name] = len(name2label.keys())

    # 读取Lable信息
    # [file1,file2],[3, 1]
    images, labels = load_csv(root, 'image.csv', name2label)

    if mode == 'train':  # 60%
        images = images[:int(0.6 * len(images))]
        labels = labels[:int(0.6 * len(labels))]
    elif mode == 'val':  # 20% = 60%->80%
        images = images[int(0.6 * len(images)):int(0.8 * len(images))]
        labels = labels[int(0.6 * len(labels)):int(0.8 * len(labels))]
    else:  # 20% = 80%->100%
        images = images[int(0.8 * len(images)):]
        labels = labels[int(0.8 * len(labels)):]

    return images, labels, name2label


# 这里的mean和std根据真实的数据计算获得，比如ImageNet
img_mean = tf.constant([0.485, 0.456, 0.406])
img_std = tf.constant([0.229, 0.224, 0.225])

def normalize(x, mean=img_mean, std=img_std):
    # 标准化
    x = (x-mean)/std
    return x

def denormalize(x, mean=img_mean, std=img_std):
    # 标准化的逆过程
    x = x*std + mean
    return x


def preprocess(x, y):
    # x: 图片的路径List，y：图片的数字编码List
    x = tf.io.read_file(x)  # 根据路径读取图片
    x = tf.image.decode_jpeg(x, channels=3)  # 图片解码
    x = tf.image.resize(x, [244, 244])  # 图片缩放

    # data augmentation(数据增强)
    # x = tf.image.random_flip_up_down(x)  # 上下翻转
    x = tf.image.random_flip_left_right(x)  # 左右翻转
    x = tf.image.random_crop(x, [224, 224, 3])

    x = tf.cast(x, dtype=tf.float32)/255.
    # 0~1 => D(0,1)  normalize
    x = normalize(x)  # 标准化
    y = tf.convert_to_tensor(y)

    return x, y


def main():
    import time

    # 加载口罩数据集，指定加载训练集
    images, labels, table = load_facialmask('Facialmask', 'train')
    print('images:', len(images), images)
    print('labels:', len(labels), labels)
    print('table:', table)

    # images: string path
    # labels: number
    db = tf.data.Dataset.from_tensor_slices((images, labels))
    db = db.shuffle(1000).map(preprocess).batch(32)

    # 创建TensorBoard(可视化)对象
    writter = tf.summary.create_file_writer('logs')
    for step, (x, y) in enumerate(db):
        # x:[32, 224, 224, 3]
        # y:[32]
        with writter.as_default():
            x = denormalize(x)  # 反向normalize，方便可视化
            # 写入图片数据
            tf.summary.image('img', x, step=step, max_outputs=9)
            time.sleep(3)





if __name__ == '__main__':
    main()

2.训练：数据集找了330张戴口罩图片，找了330张未戴口罩图片，使用Tensorflow2框架，神经网络运用的是TF2中自带的VGG19,最后再加个全连接层。

# -*-coding:utf-8-*-
# @Author: Phantom
# @Email: [email protected]
# @编译环境：windows 10 + python3.8
# @IDE：Pycharm2021.1.3
import os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import cv2.cv2 as cv
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, optimizers, Sequential
from tensorflow.keras.callbacks import EarlyStopping
from image预处理 import load_facialmask, normalize

tf.random.set_seed(1234)
np.random.seed(1234)

# transfer

gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(device=gpu, enable=True)


def preprocess(x, y):
    # x: 图片的路径List，y：图片的数字编码List
    x = tf.io.read_file(x)  # 根据路径读取图片
    x = tf.image.decode_jpeg(x, channels=3)  # 图片解码
    x = tf.image.resize(x, [244, 244])  # 图片缩放

    # data augmentation(数据增强)
    # x = tf.image.random_flip_up_down(x)  # 上下翻转
    x = tf.image.random_flip_left_right(x)  # 左右翻转
    x = tf.image.random_crop(x, [224, 224, 3])

    x = tf.cast(x, dtype=tf.float32) / 255.
    # 0~1 => D(0,1)  normalize
    x = normalize(x)  # 标准化
    y = tf.convert_to_tensor(y)
    y = tf.one_hot(y, depth=2)

    return x, y


batchsz = 128

images, labels, _ = load_facialmask('Facialmask', mode='train')
db_train = tf.data.Dataset.from_tensor_slices((images, labels))
db_train = db_train.map(preprocess).shuffle(500).batch(batchsz)

images2, labels2, _ = load_facialmask('Facialmask', mode='val')
db_val = tf.data.Dataset.from_tensor_slices((images2, labels2))
db_val = db_val.map(preprocess).batch(batchsz)

images3, labels3, _ = load_facialmask('Facialmask', mode='test')
db_test = tf.data.Dataset.from_tensor_slices((images3, labels3))
db_test = db_test.map(preprocess).batch(batchsz)

if not os.path.exists(os.path.join(r'D:Pythonpycharm projecttensorflow2Facialmask', 'facialmask.h5')):
    # 导入已经训练好的经典网络
    net = keras.applications.VGG19(weights='imagenet', include_top=False, pooling='max')
    net.trainable = False

    newnet = Sequential([
        net,
        layers.Dense(2)
    ])

    # resnet = ResNet(5)
    newnet.build(input_shape=(None, 224, 224, 3))
    newnet.summary()

    # 监听指定指标
    early_stopping = EarlyStopping(
        monitor='val_accuracy',
        min_delta=0.001,
        patience=5  # 连续5次没有增加0.001
    )

    newnet.compile(optimizer=optimizers.Adam(1e-3),
                   loss=tf.losses.CategoricalCrossentropy(from_logits=True),
                   metrics=['accuracy'])

    newnet.fit(db_train, epochs=100, validation_data=db_val, validation_freq=1, callbacks=[early_stopping])

    newnet.evaluate(db_test)
    newnet.save('facialmask.h5')
    print('saved total model.')
else:
    newnet = tf.keras.models.load_model('facialmask.h5')
    print('load model from file!')


table = ['已佩戴口罩', '未佩戴口罩']

x = tf.io.read_file('2.png')  # 根据路径读取图片
img = cv.imread('2.png')
cv.imshow('3', img)
x = tf.image.decode_jpeg(x, channels=3)  # 图片解码
x = tf.image.resize(x, [224, 224])  # 图片缩放

x = tf.cast(x, dtype=tf.float32) / 255.
# 0~1 => D(0,1)  normalize
x = normalize(x)  # 标准化
x = tf.reshape(x, [1, 224, 224, 3])

logits = newnet(x)
prob = tf.nn.softmax(logits, axis=1)
pred = tf.argmax(prob, axis=1)
pred = tf.cast(pred, dtype=tf.int32)
num = int(pred)
print(table[num])
cv.waitKey(0)

训练结果如下：

最终提前了early_stoping, 测试集正确率达到94%，效果还不错。

本图文内容来源于网友网络收集整理提供，作为学习参考使用，版权属于原作者。

THE END

深度学习计算机视觉

二维码

pytorch-TensorFlow-tensorboard工具如何启动？相对路径和绝对路径的命令行启动；如何改变启动端口？

< <上一篇

手撕 CNN 经典网络之 VGGNet（PyTorch实战篇）

下一篇>>

搜索内容

基于CNN的人像是否带口罩的分类问题

最新文章

分类

标签云