Using Python to train network -- garbage classification, (resnet18)

Keywords: Programming JSON Python network Attribute

The data set contains six kinds of garbage, namely, cardboard, glass, metal, paper, plastic and other waste products. The amount of data is small and only for learning.

The preparation of data set label includes dividing data set into training set and test set, and making label file. Code utils.py

import os
import shutil
import json
path="e://Dataset / / garbage "classification" ා this path is the directory of the six categories in the above figure, which can be modified according to its own dataset path.
classes=[garbage for garbage in os.listdir(path)]

if os.path.exists(os.path.join(os.getcwd(),'train'))==False:
    os.makedirs(os.path.join(os.getcwd(),'train'))
if os.path.exists(os.path.join(os.getcwd(),'val'))==False:
    os.makedirs(os.path.join(os.getcwd(),'val'))
f = open("garbage_train.json", 'w')
g = open("garbage_val.json", 'w')
for garbage in classes:
    s = 0
    for imgname in os.listdir(os.path.join(path,garbage)):

        if s%7!=0:
            data = {'name': imgname, 'label':classes.index(garbage)}
            jsondata = json.dumps(data)
            f.write(jsondata)
            shutil.copy(os.path.join(path, garbage, imgname),os.path.join(os.getcwd(),'train'))
        else:
            data = {'name': imgname, 'label': classes.index(garbage)}
            jsondata = json.dumps(data)
            g.write(jsondata)
            shutil.copy(os.path.join(path, garbage, imgname),os.path.join(os.getcwd(),'val'))
        s+=1

Running the above code will generate the following folder.

Next, we write a data set preprocessing class. data.py. Root is the root directory of the data set processed in the above figure, and data json is two json folders.

from PIL import Image
import torch
import os
import json
class MyDataset(torch.utils.data.Dataset):  # Create your own class: MyDataset, which is the inherited torch.utils.data.Dataset
    def __init__(self, root, datajson, transform=None, target_transform=None):  # Initialize some parameters that need to be passed in
        super(MyDataset, self).__init__()
        fh = open(datajson, 'r')  # Open the text and read the content according to the path and txt text parameters passed in
        load_dict = json.load(fh)
        imgs = [] # Create an empty list called img for later loading
        for line in load_dict: # Loop through the contents of the txt text by line

            #line = line.rstrip()# Delete the specified character at the end of the string string on this line. This method describes how to query python in detail.
            #words = line.split()  # Slice the string by specifying the separator, which defaults to all empty characters, including space, line feed, tab, etc.
            imgs.append((line['name'], int(line['label'])))  # Read the contents of txt into the imgs list and save them. The specific words depends on the contents of txt.

        self.root=root
        self.imgs = imgs
        self.transform = transform
        self.target_transform = target_transform


    def __getitem__(self, index):
          fn, label = self.imgs[index]  # fn It's a picture. path #fn and label respectively obtain imgs[index], that is, the information of word[0] and word[1] in each line just now.
          img = Image.open(os.path.join(self.root,fn)).convert('RGB')  # according to path Read the picture. from PIL import Image # Read pictures by path

          if self.transform is not None:
              img = self.transform(img)  # transform or not
          return img, label  # Return is very important. What is the content of return? What can we get when we cycle through each batch in training?

    def __len__(self):  # This function must also be written. It returns the length of the dataset, that is, the number of images, which should be distinguished from the length of the loader.
        return len(self.imgs)

Define the RESNET network again. resnet.py, it needs to be explained here. Because the data set is not large enough, many pictures are not more than 224. I plan to input 112. Here are a variety of RESNET series options. I use the simplest resnet18.

import torch
import torch.nn as nn

class BasicBlock(nn.Module):
    """Basic Block for resnet 18 and resnet 34

    """

    #BasicBlock and BottleNeck block 
    #have different output size
    #we use class attribute expansion
    #to distinct
    expansion = 1

    def __init__(self, in_channels, out_channels, stride=1):
        super().__init__()

        #residual function
        self.residual_function = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels * BasicBlock.expansion, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(out_channels * BasicBlock.expansion)
        )

        #shortcut
        self.shortcut = nn.Sequential()

        #the shortcut output dimension is not the same with residual function
        #use 1*1 convolution to match the dimension
        if stride != 1 or in_channels != BasicBlock.expansion * out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels * BasicBlock.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels * BasicBlock.expansion)
            )
        
    def forward(self, x):
        return nn.ReLU(inplace=True)(self.residual_function(x) + self.shortcut(x))

class BottleNeck(nn.Module):
    """Residual block for resnet over 50 layers

    """
    expansion = 4
    def __init__(self, in_channels, out_channels, stride=1):
        super().__init__()
        self.residual_function = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, stride=stride, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels * BottleNeck.expansion, kernel_size=1, bias=False),
            nn.BatchNorm2d(out_channels * BottleNeck.expansion),
        )

        self.shortcut = nn.Sequential()

        if stride != 1 or in_channels != out_channels * BottleNeck.expansion:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels * BottleNeck.expansion, stride=stride, kernel_size=1, bias=False),
                nn.BatchNorm2d(out_channels * BottleNeck.expansion)
            )
        
    def forward(self, x):
        return nn.ReLU(inplace=True)(self.residual_function(x) + self.shortcut(x))
    
class ResNet(nn.Module):

    def __init__(self, block, num_block, num_classes=6):
        super().__init__()

        self.in_channels = 64

        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=7, stride=1,padding=1, bias=False),
            nn.MaxPool2d(kernel_size=2,stride=2),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True))
        #we use a different inputsize than the original paper
        #so conv2_x's stride is 1
        self.conv2_x = self._make_layer(block, 64, num_block[0], 1)
        self.conv3_x = self._make_layer(block, 128, num_block[1], 2)
        self.conv4_x = self._make_layer(block, 256, num_block[2], 2)
        self.conv5_x = self._make_layer(block, 512, num_block[3], 2)
        self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)

    def _make_layer(self, block, out_channels, num_blocks, stride):
        """make resnet layers(by layer i didnt mean this 'layer' was the 
        same as a neuron netowork layer, ex. conv layer), one layer may 
        contain more than one residual block 

        Args:
            block: block type, basic block or bottle neck block
            out_channels: output depth channel number of this layer
            num_blocks: how many blocks per layer
            stride: the stride of the first block of this layer
        
        Return:
            return a resnet layer
        """

        # we have num_block blocks per layer, the first block 
        # could be 1 or 2, other blocks would always be 1
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_channels, out_channels, stride))
            self.in_channels = out_channels * block.expansion
        
        return nn.Sequential(*layers)

    def forward(self, x):
        output = self.conv1(x)
        output = self.conv2_x(output)
        output = self.conv3_x(output)
        output = self.conv4_x(output)
        output = self.conv5_x(output)
        output = self.avg_pool(output)
        output = output.view(output.size(0), -1)
        output = self.fc(output)

        return output 

def resnet18():
    """ return a ResNet 18 object
    """
    return ResNet(BasicBlock, [2, 2, 2, 2])

def resnet34():
    """ return a ResNet 34 object
    """
    return ResNet(BasicBlock, [3, 4, 6, 3])

def resnet50():
    """ return a ResNet 50 object
    """
    return ResNet(BottleNeck, [3, 4, 6, 3])

def resnet101():
    """ return a ResNet 101 object
    """
    return ResNet(BottleNeck, [3, 4, 23, 3])

def resnet152():
    """ return a ResNet 152 object
    """
    return ResNet(BottleNeck, [3, 8, 36, 3])

If you want to train more than one class, you can modify num'u classes = 6. If you want to train 224 size pictures, you can change the parameter strip in self.conv'u 1 to 2.

As I said above, my input image is 112, so the stripe is set to 1.

Set the super parameter global settings.py before the official training

import os
from datetime import datetime

#directory to save weights file
CHECKPOINT_PATH = 'checkpoint'

#total training epoches
EPOCH = 200 
MILESTONES = [60, 120, 160]

#initial learning rate
#INIT_LR = 0.1

#time of we run the script
TIME_NOW = datetime.now().isoformat()

#tensorboard log dir
LOG_DIR = 'runs'

#save weights file per SAVE_EPOCH epoch
SAVE_EPOCH = 10

Next is direct training, train.py

import sys
import argparse
from datetime import datetime

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

from torch.utils.data import DataLoader
from torch.autograd import Variable

from tensorboardX import SummaryWriter
import glabol_settings as settings
def get_network(args, use_gpu=True):
        if args.net == 'resnet18':
        from models.resnet import resnet18
        net = resnet18()
    elif args.net == 'resnet34':
        from models.resnet import resnet34
        net = resnet34()
    elif args.net == 'resnet50':
        from models.resnet import resnet50
        net = resnet50()
    elif args.net == 'resnet101':
        from models.resnet import resnet101
        net = resnet101()
    elif args.net == 'resnet152':
        from models.resnet import resnet152
        net = resnet152()
from data import *
path=os.getcwd()
class WarmUpLR(_LRScheduler):
    """warmup_training learning rate scheduler
    Args:
        optimizer: optimzier(e.g. SGD)
        total_iters: totoal_iters of warmup phase
    """
    def __init__(self, optimizer, total_iters, last_epoch=-1):
        
        self.total_iters = total_iters
        super().__init__(optimizer, last_epoch)

    def get_lr(self):
        """we will use the first m batches, and set the learning
        rate to base_lr * m / total_iters
        """
        return [base_lr * self.last_epoch / (self.total_iters + 1e-8) for base_lr in self.base_lrs]

def train(epoch):

    net.train()
    for batch_index, (images, labels) in enumerate(train_set):
        if epoch <= args.warm:
            warmup_scheduler.step()
        print('label:',labels.shape)
        images = Variable(images)
        labels = Variable(labels)

        labels = labels.cuda()
        images = images.cuda()

        optimizer.zero_grad()
        outputs = net(images)

        loss = loss_function(outputs, labels)
        loss.backward()
        optimizer.step()

        n_iter = (epoch - 1) * len(train_set) + batch_index + 1

        last_layer = list(net.children())[-1]
        for name, para in last_layer.named_parameters():
            if 'weight' in name:
                writer.add_scalar('LastLayerGradients/grad_norm2_weights', para.grad.norm(), n_iter)
            if 'bias' in name:
                writer.add_scalar('LastLayerGradients/grad_norm2_bias', para.grad.norm(), n_iter)

        print('Training Epoch: {epoch} [{trained_samples}/{total_samples}]\tLoss: {:0.4f}\tLR: {:0.6f}'.format(
            loss.item(),
            optimizer.param_groups[0]['lr'],
            epoch=epoch,
            trained_samples=batch_index * args.b + len(images),
            total_samples=len(train_set.dataset)
        ))

        #update training loss for each iteration
        writer.add_scalar('Train/loss', loss.item(), n_iter)

    for name, param in net.named_parameters():
        layer, attr = os.path.splitext(name)
        attr = attr[1:]
        writer.add_histogram("{}/{}".format(layer, attr), param, epoch)

def eval_training(epoch):
    net.eval()

    test_loss = 0.0 # cost function error
    correct = 0.0

    for (images, labels) in test_set:
        images = Variable(images)
        labels = Variable(labels)

        images = images.cuda()
        labels = labels.cuda()

        outputs = net(images)
        loss = loss_function(outputs, labels)
        test_loss += loss.item()
        _, preds = outputs.max(1)
        correct += preds.eq(labels).sum()

    print('Test set: Average loss: {:.4f}, Adduracy: {:.4f}'.format(
        test_loss / len(test_set.dataset),
        correct.float() / len(test_set.dataset)
    ))
    print()

    #add informations to tensorboard
    writer.add_scalar('Test/Average loss', test_loss / len(test_set.dataset), epoch)
    writer.add_scalar('Test/Adduracy', correct.float() / len(test_set.dataset), epoch)

    return correct.float() / len(test_set.dataset)

if __name__ == '__main__':
    
    parser = argparse.ArgumentParser()
    parser.add_argument('-net', type=str, default="resnet18", help='net type')
    parser.add_argument('-gpu', type=bool, default=True, help='use gpu or not')
    parser.add_argument('-w', type=int, default=2, help='number of workers for dataloader')
    parser.add_argument('-b', type=int, default=8, help='batch size for dataloader')
    parser.add_argument('-s', type=bool, default=True, help='whether shuffle the dataset')
    parser.add_argument('-warm', type=int, default=1, help='warm up training phase')
    parser.add_argument('-lr', type=float, default=0.001, help='initial learning rate')
    args = parser.parse_args()

    net = get_network(args, use_gpu=args.gpu)
        
    #data preprocessing:
    mean = [0.5071, 0.4867, 0.4408]
    stdv = [0.2675, 0.2565, 0.2761]
    train_transforms = transforms.Compose([
        transforms.RandomCrop(112),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=stdv),
    ])
    test_transforms = transforms.Compose([
        transforms.RandomCrop(112),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=stdv),
    ])

    # Datasets
    train_set = MyDataset(root='****/train_img',
                          datajson='**/train.json', transform=train_transforms)
    test_set = MyDataset(root='**/val_img',
                         datajson='**/val.json', transform=test_transforms)
    train_set = DataLoader(
        train_set, shuffle=True, num_workers=1, batch_size=8)
    test_set= DataLoader(
        test_set, shuffle=True, num_workers=1, batch_size=8)
    
    loss_function = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4)
    train_scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=settings.MILESTONES, gamma=0.2) #learning rate decay
    iter_per_epoch = len(train_set)
    warmup_scheduler = WarmUpLR(optimizer, iter_per_epoch * args.warm)
    checkpoint_path = os.path.join(settings.CHECKPOINT_PATH, args.net, 'lj')

    #use tensorboard
    if not os.path.exists(settings.LOG_DIR):
        os.mkdir(settings.LOG_DIR)
    log_dir = os.path.join(
            settings.LOG_DIR, args.net,'lj')
    print(log_dir)
    writer = SummaryWriter(log_dir)
    input_tensor = torch.Tensor(8, 3, 112, 112).cuda()
    print("done")
    #writer.add_graph(net, Variable(input_tensor, requires_grad=True))

    #create checkpoint folder to save model
    if not os.path.exists(checkpoint_path):
        os.makedirs(checkpoint_path)
    checkpoint_path = os.path.join(checkpoint_path, '{net}-{epoch}-{type}.pth')

    best_add = 0.0
    for epoch in range(1, settings.EPOCH):
        if epoch > args.warm:
            train_scheduler.step(epoch)

        train(epoch)
        add = eval_training(epoch)

        #start to save best performance model after learning rate decay to 0.01 
        if epoch > settings.MILESTONES[1] and best_add < add:
            torch.save(net.state_dict(), checkpoint_path.format(net=args.net, epoch=epoch, type='best'))
            best_add = add
            continue

        if not epoch % settings.SAVE_EPOCH:
            torch.save(net.state_dict(), checkpoint_path.format(net=args.net, epoch=epoch, type='regular'))

    writer.close()

After the training, we get the model saved in checkpoints path, resnet18.pth.

Test set to test the effect of a single demo, demo.py

import argparse
#from dataset import *

#from skimage import io
from matplotlib import pyplot as plt

import torch
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torch.autograd import Variable
import gloal_settings as settings

from PIL import Image
import torchvision.transforms as transforms
import glabol_settings as settings
def get_network(args, use_gpu=True):
        if args.net == 'resnet18':
        from models.resnet import resnet18
        net = resnet18()
    elif args.net == 'resnet34':
        from models.resnet import resnet34
        net = resnet34()
    elif args.net == 'resnet50':
        from models.resnet import resnet50
        net = resnet50()
    elif args.net == 'resnet101':
        from models.resnet import resnet101
        net = resnet101()
    elif args.net == 'resnet152':
        from models.resnet import resnet152
        net = resnet152()
if __name__ == '__main__':

    parser = argparse.ArgumentParser()
    parser.add_argument('-net', type=str, default="resnet18", help='net type')
    parser.add_argument('-weights', type=str, default="checkpoint/resnet18/lj/resnet18.pth", help='the weights file you want to test')
    parser.add_argument('-gpu', type=bool, default=True, help='use gpu or not')
    #parser.add_argument('-w', type=int, default=2, help='number of workers for dataloader')
    #parser.add_argument('-b', type=int, default=16, help='batch size for dataloader')
    #parser.add_argument('-s', type=bool, default=True, help='whether shuffle the dataset')
    args = parser.parse_args()

    net = get_network(args)



    net.load_state_dict(torch.load(args.weights), args.gpu)
    print(net)
    net.eval()

    correct_1 = 0.0
    correct_5 = 0.0
    total = 0
    transform_test = transforms.Compose([
        transforms.RandomCrop(112),
        transforms.ToTensor(),
        transforms.Normalize(settings.CIFAR100_TRAIN_MEAN,settings.CIFAR100_TRAIN_STD)
    ])
    imgs=Image.open("test.jpg")
    img=transform_test(imgs)
    img=img.unsqueeze(0)
    image = Variable(img).cuda()
    print(image.shape)
    import time
    #label = Variable(img).cuda()
    start=time.time()
    output = net(image)
    print(time.time()-start)
    print(output)
    _, pred = output.topk(1, 1, largest=True, sorted=True)
    print(class_car[pred[0].item()])
    imgs.show()

Let me test one!

Identified as cardboard. If you are interested, please leave a message. I will send the data set and training model pth file later.

 

Posted by Goon on Thu, 31 Oct 2019 00:56:05 -0700