Target Detection and Seat Balance Analysis

Keywords: Python Deep Learning Object Detection

1. Overview

This is an application for target detection, followed by target detection (28 messages) YOLOv5 training its own dataset _ONEPIECE_00's blog - CSDN blog And target positioning (28 messages) Target Detection and Target Location _ONEPIECE_00 Blog-CSDN Blog To determine whether there are people on the seat based on the results of the detection. To modify the code on the basis of target positioning, it is mainly to increase the detection of multiple pictures in sequence, to judge the results by certain logic of the results, and to increase the timer function. It is mainly to process and apply the results on the basis of the results of the detection, and then to detect detec.py file, respectively.The site_pro.py of the target location and the pro.py file processed by the detection result are explained.

2. Code Details

1. Target Detection

First, detect.py, which detects pictures, needs to be modified by adding ports to change the path of the pictures and rewriting the main main main function to ensure that the path can be modified and executed.

#Write a function to change the path
def changes(new="yolov5-master/data/JPEGImages/01.jpg"):
    global new_
    new_=new

#Change the path to a variable for easy modification
def parse_opt():
    parser.add_argument('--source', type=str, default=new_, help='file/dir/URL/glob, 0 for webcam')

#Writes the function that changes the path to the same function as the main function, ensuring that the function is implemented with the same path
def main_(opt):
    print(colorstr('detect: ') + ', '.join(f'{k}={v}' for k, v in vars(opt).items()))
    check_requirements(exclude=('tensorboard', 'thop'))
    return run(**vars(opt))

#Formal parameter sou is path
def main(sou):
    changes(sou)
    opt = parse_opt()
    return main_(opt)

Complete detec.py code, the final output must use return output, otherwise the call after will be empty. It is best to copy the original code and re-establish detec2.py

import argparse
import sys
import time
from pathlib import Path

import cv2
import numpy as np
import torch
import torch.backends.cudnn as cudnn

FILE = Path(__file__).absolute()
sys.path.append(FILE.parents[0].as_posix())  # add yolov5/ to path

from models.experimental import attempt_load
from utils.datasets import LoadStreams, LoadImages
from utils.general import check_img_size, check_requirements, check_imshow, colorstr, is_ascii, non_max_suppression, \
    apply_classifier, scale_coords, xyxy2xywh, strip_optimizer, set_logging, increment_path, save_one_box
from utils.plots import Annotator, colors
from utils.torch_utils import select_device, load_classifier, time_sync


@torch.no_grad()
def run(weights='yolov5s.pt',  # model.pt path(s)
        source='data/images',  # file/dir/URL/glob, 0 for webcam
        imgsz=[640,640],  # inference size (pixels)
        conf_thres=0.25,  # confidence threshold
        iou_thres=0.45,  # NMS IOU threshold
        max_det=1000,  # maximum detections per image
        device='',  # cuda device, i.e. 0 or 0,1,2,3 or cpu
        view_img=False,  # show results
        save_txt=False,  # save results to *.txt
        save_conf=False,  # save confidences in --save-txt labels
        save_crop=False,  # save cropped prediction boxes
        nosave=False,  # do not save images/videos
        classes=None,  # filter by class: --class 0, or --class 0 2 3
        agnostic_nms=False,  # class-agnostic NMS
        augment=False,  # augmented inference
        visualize=False,  # visualize features
        update=False,  # update all models
        project='runs/detect',  # save results to project/name
        name='exp',  # save results to project/name
        exist_ok=False,  # existing project/name ok, do not increment
        line_thickness=3,  # bounding box thickness (pixels)
        hide_labels=False,  # hide labels
        hide_conf=False,  # hide confidences
        half=False,  # use FP16 half-precision inference
        ):
    save_img = not nosave and not source.endswith('.txt')  # save inference images keep inferred pictures
    webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith(
        ('rtsp://', 'rtmp://', 'http://', 'https://'))

    # Directories directory
    save_dir = increment_path(Path(project) / name, exist_ok=exist_ok)  # increment run
    (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir

    # Initialize Initialization
    set_logging()
    device = select_device(device)
    half &= device.type != 'cpu'  # half precision only supported on CUDA

    # Load model load model
    w = weights[0] if isinstance(weights, list) else weights
    classify, suffix = False, Path(w).suffix.lower()
    pt, onnx, tflite, pb, saved_model = (suffix == x for x in ['.pt', '.onnx', '.tflite', '.pb', ''])  # backend
    stride, names = 64, [f'class{i}' for i in range(1000)]  # assign defaults
    if pt:
        model = attempt_load(weights, map_location=device)  # load FP32 model
        stride = int(model.stride.max())  # model stride
        names = model.module.names if hasattr(model, 'module') else model.names  # get class names
        if half:
            model.half()  # to FP16
        if classify:  # second-stage classifier
            modelc = load_classifier(name='resnet50', n=2)  # initialize
            modelc.load_state_dict(torch.load('resnet50.pt', map_location=device)['model']).to(device).eval()
    elif onnx:
        check_requirements(('onnx', 'onnxruntime'))
        import onnxruntime
        session = onnxruntime.InferenceSession(w, None)
    else:  # TensorFlow models
        check_requirements(('tensorflow>=2.4.1',))
        import tensorflow as tf
        if pb:  # https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt
            def wrap_frozen_graph(gd, inputs, outputs):
                x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=""), [])  # wrapped import
                return x.prune(tf.nest.map_structure(x.graph.as_graph_element, inputs),
                               tf.nest.map_structure(x.graph.as_graph_element, outputs))

            graph_def = tf.Graph().as_graph_def()
            graph_def.ParseFromString(open(w, 'rb').read())
            frozen_func = wrap_frozen_graph(gd=graph_def, inputs="x:0", outputs="Identity:0")
        elif saved_model:
            model = tf.keras.models.load_model(w)
        elif tflite:
            interpreter = tf.lite.Interpreter(model_path=w)  # load TFLite model
            interpreter.allocate_tensors()  # allocate
            input_details = interpreter.get_input_details()  # inputs
            output_details = interpreter.get_output_details()  # outputs
            int8 = input_details[0]['dtype'] == np.uint8  # is TFLite quantized uint8 model
    imgsz = check_img_size(imgsz, s=stride)  # check image size
    ascii = is_ascii(names)  # names are ascii (use PIL for UTF-8)

    # Dataloader
    if webcam:
        view_img = check_imshow()
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt)
        bs = len(dataset)  # batch_size
    else:
        dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt)
        bs = 1  # batch_size
    vid_path, vid_writer = [None] * bs, [None] * bs

    # Run inference
    if pt and device.type != 'cpu':
        model(torch.zeros(1, 3, *imgsz).to(device).type_as(next(model.parameters())))  # run once
    t0 = time.time()
    for path, img, im0s, vid_cap in dataset:
        if onnx:
            img = img.astype('float32')
        else:
            img = torch.from_numpy(img).to(device)
            img = img.half() if half else img.float()  # uint8 to fp16/32
        img = img / 255.0  # 0 - 255 to 0.0 - 1.0
        if len(img.shape) == 3:
            img = img[None]  # expand for batch dim

        # Inference
        t1 = time_sync()
        if pt:
            visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
            pred = model(img, augment=augment, visualize=visualize)[0]
        elif onnx:
            pred = torch.tensor(session.run([session.get_outputs()[0].name], {session.get_inputs()[0].name: img}))
        else:  # tensorflow model (tflite, pb, saved_model)
            imn = img.permute(0, 2, 3, 1).cpu().numpy()  # image in numpy
            if pb:
                pred = frozen_func(x=tf.constant(imn)).numpy()
            elif saved_model:
                pred = model(imn, training=False).numpy()
            elif tflite:
                if int8:
                    scale, zero_point = input_details[0]['quantization']
                    imn = (imn / scale + zero_point).astype(np.uint8)  # de-scale
                interpreter.set_tensor(input_details[0]['index'], imn)
                interpreter.invoke()
                pred = interpreter.get_tensor(output_details[0]['index'])
                if int8:
                    scale, zero_point = output_details[0]['quantization']
                    pred = (pred.astype(np.float32) - zero_point) * scale  # re-scale
            pred[..., 0] *= imgsz[1]  # x
            pred[..., 1] *= imgsz[0]  # y
            pred[..., 2] *= imgsz[1]  # w
            pred[..., 3] *= imgsz[0]  # h
            pred = torch.tensor(pred)
            
        
        # NMS
        pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
        t2 = time_sync()

        # Second-stage classifier (optional)
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)
        
        
        # Process predictions
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0, frame = path[i], f'{i}: ', im0s[i].copy(), dataset.count
            else:
                p, s, im0, frame = path, '', im0s.copy(), getattr(dataset, 'frame', 0)

            p = Path(p)  # to Path
            save_path = str(save_dir / p.name)  # img.jpg
            txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}')  # img.txt
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
            imc = im0.copy() if save_crop else im0  # for save_crop
            annotator = Annotator(im0, line_width=line_thickness, pil=not ascii)
            if len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string
                  
                
                # Write results
                for *xyxy, conf, cls in reversed(det):
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
                        line = (cls, *xywh, conf) if save_conf else (cls, *xywh)  # label format
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * len(line)).rstrip() % line + '\n')

                    if save_img or save_crop or view_img:  # Add bbox to image
                        c = int(cls)  # integer class
                        label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}')
                        annotator.box_label(xyxy, label, color=colors(c, True))
                        if save_crop:
                            save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True)
            
            
            # Print time (inference + NMS)
            print(f'{s}Done. ({t2 - t1:.3f}s)')
            

            # Stream results
            im0 = annotator.result()
            if view_img:
                cv2.imshow(str(p), im0)
                cv2.waitKey(1)  # 1 millisecond

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'image':
                    cv2.imwrite(save_path, im0)
                else:  # 'video' or 'stream'
                    if vid_path[i] != save_path:  # new video
                        vid_path[i] = save_path
                        if isinstance(vid_writer[i], cv2.VideoWriter):
                            vid_writer[i].release()  # release previous video writer
                        if vid_cap:  # video
                            fps = vid_cap.get(cv2.CAP_PROP_FPS)
                            w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                            h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        else:  # stream
                            fps, w, h = 30, im0.shape[1], im0.shape[0]
                            save_path += '.mp4'
                        vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
                    vid_writer[i].write(im0)

    if save_txt or save_img:
        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
        print(f"Results saved to {colorstr('bold', save_dir)}{s}")

    if update:
        strip_optimizer(weights)  # update model (to fix SourceChangeWarning)

    print(f'Done. ({time.time() - t0:.3f}s)')
    #Add to
    from site_pro import site 
    return site(source,pred,names)

def changes(new="yolov5-master/data/JPEGImages/01.jpg"):
    global new_
    new_=new
    

def parse_opt():
    parser = argparse.ArgumentParser()
    parser.add_argument('--weights', nargs='+', type=str, default='/content/gdrive/MyDrive/yolov5-master/runs/train/use_1/weights/best.pt', help='model.pt path(s)')
    parser.add_argument('--source', type=str, default=new_, help='file/dir/URL/glob, 0 for webcam')
    parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640,640], help='inference size h,w')
    parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold')
    parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold')
    parser.add_argument('--max-det', type=int, default=1000, help='maximum detections per image')
    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
    parser.add_argument('--view-img', action='store_true', help='show results')
    parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
    parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
    parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes')
    parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
    parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
    parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
    parser.add_argument('--augment', action='store_true', help='augmented inference')
    parser.add_argument('--visualize', action='store_true', help='visualize features')
    parser.add_argument('--update', action='store_true', help='update all models')
    parser.add_argument('--project', default='runs/detect', help='save results to project/name')
    parser.add_argument('--name', default='exp', help='save results to project/name')
    parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
    parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)')
    parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels')
    parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences')
    parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
    opt = parser.parse_args()
    opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1  # expand
    return opt



def main_(opt):
    print(colorstr('detect: ') + ', '.join(f'{k}={v}' for k, v in vars(opt).items()))
    check_requirements(exclude=('tensorboard', 'thop'))
    return run(**vars(opt))

def main(sou):
    changes(sou)
    opt = parse_opt()
    return main_(opt)

2. Target Positioning

This step simply changes the output to a list containing the results of each identified object, and return s the output as [(x,y, probability, identified type, region (easy to calculate, temporarily divided into left and right regions)].

[[0.5844, 0.6292, 0.8585752, 'person', 'left'], [0.6292, 0.4757, 0.8243118, 'computer', 'left'], [0.4219, 0.4757, 0.6576152, 'cup', 'right']]

Complete code:

import os 
from PIL import Image
def site(source,pred,names): 
  img=Image.open(source)
  x1,x2=img.size
  s2=[]
  for i1 in pred:
    s=[]
    for i2 in i1.data.cpu().numpy():
      s1=[]
      s=list(i2)
      #Getting (x,y) coordinates of the center
      x=s[0]=float(round((s[0]+s[2])/x1/2,4))
      y=s[1]=float(round((s[1]+s[3])/x2/2,4))
      #Location Judgment
      if x>0.5:
        w='left'
      elif x<=0.5:
        w='right'
      s1.append(x)
      s1.append(y)
      s1.append(s[4])
      s1.append(names[int(s[5])])
      if s[4]<0.6:
        break
      s1.append(w)
      s2.append(s1)
    return s2

3. Seat balance analysis

My goal is to determine the location margin by target detection. There are three situations: no one, no debris for a long time, no one. First, to judge this situation, it requires several images of the scene within a certain time interval. I use to simulate this situation by sequentially extracting multiple photos of a folder.

#Scenario simulation, taking file names of pictures in folders
imagelist=os.listdir("/content/gdrive/MyDrive/yolov5-master/requirement")
print(imagelist)

Then target detection is performed on the picture by calling the picture path and saving the result, and the result is calculated according to logic, such as accumulating over a certain value for judgment, and then a certain time interval may be set according to different situations. If someone else is normal, no one will shorten the time interval for identifying the next photo.

#Timing function
def clock_time(x):
    a1=time.time()
    while True:
        a2=time.time()
        if a2-a1>x:
            break

for i1 in imagelist:
        s=t.main("/content/gdrive/MyDrive/yolov5-master/requirement/"+i1)
        x=0
        #Processing identified results
        for i2 in range(len(s)):
            #right, left calculated separately
            if s[i2]==[]:
                x=2
            elif s[i2][3]=='person':
                x=0
            elif s[i2][3] in ['computer', 'person', 'phone', 'tablet phone', 'cup', 'bag', 'bag2', 'books']:
                x=1                  
            rt[s[i2][4]]=rt.get(s[i2][4])+x
        #Take different time intervals for people, people and objects
        if x==0:
          clock_time(1)
        elif x==1:
          clock_time(2)
        elif x==2:
          clock_time(3)

Finally, based on the results obtained, we can judge the situation in different areas.

if rt['right']>1:
        print("right Unmanned")
    else:
        print("right Someone")

    if rt['left']>1:
        print('left Unmanned')
    else:
        print('left Someone')

Complete code:

import time
import detect_2 as t
import os

#Timing function
def clock_time(x):
    a1=time.time()
    while True:
        a2=time.time()
        if a2-a1>x:
            break


def time_():
    imagelist=os.listdir("/content/gdrive/MyDrive/yolov5-master/requirement")
    print(imagelist)
    rt={'right':0,'left':0}
    #Detect file pictures in order
    for i1 in imagelist:
        #print(i1)
        s=t.main("/content/gdrive/MyDrive/yolov5-master/requirement/"+i1)
        #print(s)
        #print(len(s))
        x=0
        #Processing identified results
        for i2 in range(len(s)):
            #right, left calculated separately
            if s[i2]==[]:
                x=2
            elif s[i2][3]=='person':
                x=0
            elif s[i2][3] in ['computer', 'person', 'phone', 'tablet phone', 'cup', 'bag', 'bag2', 'books']:
                x=1                  
            rt[s[i2][4]]=rt.get(s[i2][4])+x
        #Take different time intervals for people, people and objects
        if x==0:
          clock_time(1)
        elif x==1:
          clock_time(2)
        elif x==2:
          clock_time(3)

    if rt['right']>1:
        print("right Unmanned")
    else:
        print("right Someone")

    if rt['left']>1:
        print('left Unmanned')
    else:
        print('left Someone')

          
time_()

3. Summary

Generally speaking, it is an application of target detection, which is used to analyze and judge the existence of objects in an area. The code is relatively simple. I write here is also relatively simple, and the specific use needs to really improve its own situation.

Posted by hilltopper06 on Tue, 05 Oct 2021 09:02:37 -0700

Programmer Group

Target Detection and Seat Balance Analysis

Hot Keywords