基于CNN的人像是否带口罩的分类问题
由于疫情的影响,口罩已经成为人们生活日常中的必需品,在某些场合中,更要必须佩戴口罩,那么口罩检测就成了必然的问题。今天,我们就来看看,基于卷积神经网络,怎么做一个能够检测人是否戴口罩的demo。
1.首先搭建数据集,结构如下:
Facialmask
|
|
已佩戴口罩
| |
| |
| 图片
|
|
未佩戴口罩
| |
| |
| 图片
|
|
image.csv
其中image.csv写一个数据集预处理程序搞定:
# -*-coding:utf-8-*-
# @Author: Phantom
# @Email: [email protected]
# @编译环境:windows 10 + python3.8
# @IDE:Pycharm2021.1.3
import csv
import glob
import os
import random
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
for gpu in gpus:
tf.config.experimental.set_memory_growth(device=gpu, enable=True)
# 加载处理数据集
def load_csv(root, filename, name2label):
# 从csv文件返回images,labels列表
# root:数据集根目录,filename:csv文件名, name2label:类别名编码表
if not os.path.exists(os.path.join(root, filename)):
# 如果csv文件不存在,则创建
images = []
for name in name2label.keys(): # 遍历所有子目录,获得所有的图片
# 只考虑后缀为png,jpg,jpeg的图片:'pokemon\mewtwo\00001.png
images += glob.glob(os.path.join(root, name, '*png')) # glob.glob()字符串匹配
images += glob.glob(os.path.join(root, name, '*.jpg'))
images += glob.glob(os.path.join(root, name, '*.jpeg'))
# 打印数据集信息:1167, 'pokemon\bulbasaur\00000000.png'
print(len(images), images)
random.shuffle(images) # 随机打散顺序
# 创建csv文件,并存储图片路径及其label信息
with open(os.path.join(root, filename), mode='w', newline='') as f:
writer = csv.writer(f)
for img in images:
name = img.split(os.sep)[-2] # 倒数第二个元素(就是name)
label = name2label[name]
writer.writerow([img, label])
print('written into csv file:', filename)
# 此时已经有csv文件,直接读取
images, labels = [], []
with open(os.path.join(root, filename)) as f:
reader = csv.reader(f)
for row in reader:
# 'pokemon\bulbasaur\00000000.png', 0
img, label = row
label = int(label)
images.append(img)
labels.append(label)
# 返回图片路径list和标签list
return images, labels
def load_facialmask(root, mode='train'):
# 创建数字编码表
name2label = {} # 'sq...':0
# 遍历根目录下的子文件夹,并排序,保证映射关系固定
for name in sorted(os.listdir(os.path.join(root))):
# 跳过非文件夹
if not os.path.isdir(os.path.join(root, name)):
continue
# 给每个类别编码一个数字
name2label[name] = len(name2label.keys())
# 读取Lable信息
# [file1,file2],[3, 1]
images, labels = load_csv(root, 'image.csv', name2label)
if mode == 'train': # 60%
images = images[:int(0.6 * len(images))]
labels = labels[:int(0.6 * len(labels))]
elif mode == 'val': # 20% = 60%->80%
images = images[int(0.6 * len(images)):int(0.8 * len(images))]
labels = labels[int(0.6 * len(labels)):int(0.8 * len(labels))]
else: # 20% = 80%->100%
images = images[int(0.8 * len(images)):]
labels = labels[int(0.8 * len(labels)):]
return images, labels, name2label
# 这里的mean和std根据真实的数据计算获得,比如ImageNet
img_mean = tf.constant([0.485, 0.456, 0.406])
img_std = tf.constant([0.229, 0.224, 0.225])
def normalize(x, mean=img_mean, std=img_std):
# 标准化
x = (x-mean)/std
return x
def denormalize(x, mean=img_mean, std=img_std):
# 标准化的逆过程
x = x*std + mean
return x
def preprocess(x, y):
# x: 图片的路径List,y:图片的数字编码List
x = tf.io.read_file(x) # 根据路径读取图片
x = tf.image.decode_jpeg(x, channels=3) # 图片解码
x = tf.image.resize(x, [244, 244]) # 图片缩放
# data augmentation(数据增强)
# x = tf.image.random_flip_up_down(x) # 上下翻转
x = tf.image.random_flip_left_right(x) # 左右翻转
x = tf.image.random_crop(x, [224, 224, 3])
x = tf.cast(x, dtype=tf.float32)/255.
# 0~1 => D(0,1) normalize
x = normalize(x) # 标准化
y = tf.convert_to_tensor(y)
return x, y
def main():
import time
# 加载口罩数据集,指定加载训练集
images, labels, table = load_facialmask('Facialmask', 'train')
print('images:', len(images), images)
print('labels:', len(labels), labels)
print('table:', table)
# images: string path
# labels: number
db = tf.data.Dataset.from_tensor_slices((images, labels))
db = db.shuffle(1000).map(preprocess).batch(32)
# 创建TensorBoard(可视化)对象
writter = tf.summary.create_file_writer('logs')
for step, (x, y) in enumerate(db):
# x:[32, 224, 224, 3]
# y:[32]
with writter.as_default():
x = denormalize(x) # 反向normalize,方便可视化
# 写入图片数据
tf.summary.image('img', x, step=step, max_outputs=9)
time.sleep(3)
if __name__ == '__main__':
main()
2.训练:数据集找了330张戴口罩图片,找了330张未戴口罩图片,使用Tensorflow2框架,神经网络运用的是TF2中自带的VGG19,最后再加个全连接层。
# -*-coding:utf-8-*-
# @Author: Phantom
# @Email: [email protected]
# @编译环境:windows 10 + python3.8
# @IDE:Pycharm2021.1.3
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import cv2.cv2 as cv
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, optimizers, Sequential
from tensorflow.keras.callbacks import EarlyStopping
from image预处理 import load_facialmask, normalize
tf.random.set_seed(1234)
np.random.seed(1234)
# transfer
gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
for gpu in gpus:
tf.config.experimental.set_memory_growth(device=gpu, enable=True)
def preprocess(x, y):
# x: 图片的路径List,y:图片的数字编码List
x = tf.io.read_file(x) # 根据路径读取图片
x = tf.image.decode_jpeg(x, channels=3) # 图片解码
x = tf.image.resize(x, [244, 244]) # 图片缩放
# data augmentation(数据增强)
# x = tf.image.random_flip_up_down(x) # 上下翻转
x = tf.image.random_flip_left_right(x) # 左右翻转
x = tf.image.random_crop(x, [224, 224, 3])
x = tf.cast(x, dtype=tf.float32) / 255.
# 0~1 => D(0,1) normalize
x = normalize(x) # 标准化
y = tf.convert_to_tensor(y)
y = tf.one_hot(y, depth=2)
return x, y
batchsz = 128
images, labels, _ = load_facialmask('Facialmask', mode='train')
db_train = tf.data.Dataset.from_tensor_slices((images, labels))
db_train = db_train.map(preprocess).shuffle(500).batch(batchsz)
images2, labels2, _ = load_facialmask('Facialmask', mode='val')
db_val = tf.data.Dataset.from_tensor_slices((images2, labels2))
db_val = db_val.map(preprocess).batch(batchsz)
images3, labels3, _ = load_facialmask('Facialmask', mode='test')
db_test = tf.data.Dataset.from_tensor_slices((images3, labels3))
db_test = db_test.map(preprocess).batch(batchsz)
if not os.path.exists(os.path.join(r'D:Pythonpycharm projecttensorflow2Facialmask', 'facialmask.h5')):
# 导入已经训练好的经典网络
net = keras.applications.VGG19(weights='imagenet', include_top=False, pooling='max')
net.trainable = False
newnet = Sequential([
net,
layers.Dense(2)
])
# resnet = ResNet(5)
newnet.build(input_shape=(None, 224, 224, 3))
newnet.summary()
# 监听指定指标
early_stopping = EarlyStopping(
monitor='val_accuracy',
min_delta=0.001,
patience=5 # 连续5次没有增加0.001
)
newnet.compile(optimizer=optimizers.Adam(1e-3),
loss=tf.losses.CategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
newnet.fit(db_train, epochs=100, validation_data=db_val, validation_freq=1, callbacks=[early_stopping])
newnet.evaluate(db_test)
newnet.save('facialmask.h5')
print('saved total model.')
else:
newnet = tf.keras.models.load_model('facialmask.h5')
print('load model from file!')
table = ['已佩戴口罩', '未佩戴口罩']
x = tf.io.read_file('2.png') # 根据路径读取图片
img = cv.imread('2.png')
cv.imshow('3', img)
x = tf.image.decode_jpeg(x, channels=3) # 图片解码
x = tf.image.resize(x, [224, 224]) # 图片缩放
x = tf.cast(x, dtype=tf.float32) / 255.
# 0~1 => D(0,1) normalize
x = normalize(x) # 标准化
x = tf.reshape(x, [1, 224, 224, 3])
logits = newnet(x)
prob = tf.nn.softmax(logits, axis=1)
pred = tf.argmax(prob, axis=1)
pred = tf.cast(pred, dtype=tf.int32)
num = int(pred)
print(table[num])
cv.waitKey(0)
训练结果如下:
最终提前了early_stoping, 测试集正确率达到94%,效果还不错。
本图文内容来源于网友网络收集整理提供,作为学习参考使用,版权属于原作者。
THE END
二维码