🥄 👌🏾 🖖🏿 Recherche de violations dans la vidéo à l'aide de la vision par ordinateur ⛵️ 🤽🏾 🐿️

Supposons que cette violation est possible. Comment l'identifier?

Nous avons à notre disposition des enregistrements de caméras de surveillance du lieu de travail de l'employé et un journal des opérations.

Nous chercherons tous les moments du dossier où le client était absent. Le réseau neuronal MobileNet et CSRT Tracker de la bibliothèque opencv nous y aideront. Et pour plus de commodité, également Tesseract-OCR.

Pour trouver une personne dans le cadre, nous utiliserons le réseau neuronal MobileNet. Ce réseau vous permet de détecter et de localiser 20 types d'objets dans l'image. Pour que cela fonctionne, vous devez télécharger deux fichiers: l'architecture et les poids. Ces fichiers se trouvent dans le référentiel Github .

Avant d'écrire le code, nous devons installer la bibliothèque de vision par ordinateur cv2 et le package pytesseract pour traiter le texte sur les images.

!pip install opencv-python
!pip install pytesseract

Pour que pytesseract fonctionne, vous devez d'abord télécharger la distribution Tesseract-OCR à partir du site officiel et l'installer.

Commencer à préparer le traitement vidéo

Nous importons les packages et écrivons le chemin vers le dossier Tesseract-OCR dans l'environnement local:

import os

video_path = ... #  
tesseract_path = ... #   Tesseract
os.environ["PATH"] += os.pathsep + tesseract_path

import pytesseract
import cv2
import imutils
import pandas as pd
import datetime as dt

, . , / :

df = pd.DataFrame(columns = ['', '  '])
work_place = () #,   
date = None #      
tracked = False #

, . , :

prototxt = 'MobileNetSSD_deploy.prototxt' #
weights = 'MobileNetSSD_deploy.caffemodel' #

20 , :

classNames = {0: 'background',
              1: 'aeroplane',
              2: 'bicycle',
              3: 'bird',
              4: 'boat',
              5: 'bottle',
              6: 'bus',
              7: 'car',
              8: 'cat',
              9: 'chair',
              10: 'cow',
              11: 'diningtable',
              12: 'dog',
              13: 'horse',
              14: 'motorbike',
              15: 'person',
              16: 'pottedplant',
              17: 'sheep',
              18: 'sofa',
              19: 'train',
              20: 'tvmonitor'}

, .

thr = 0.1 #

net = cv2.dnn.readNetFromCaffe(prototxt, weights) #

cv2.VideoCapture, :

cap = cv2.VideoCapture(video_path)

, .read(), . , . . :

%%time

cap = cv2.VideoCapture(video_path)

total_frame = 0
while True:
    success, frame = cap.read()
    if success:
        total_frame += 1
    else:
        break
        

video_length = ... #   
fps = round(total_frame / video_length)
fps

, . 100- 2 .

, , , . , , .

while cap.isOpened():

    ret, frame = cap.read()
    
    if ret:
        
        frame = imutils.resize(frame, width=1200) # ,   


        #  ,    
        if len(work_place) == 0:
            cv2.putText(frame, 'Set the client\'s location', (0, 90), cv2.FONT_HERSHEY_SIMPLEX, 
                2, (0,255,0), 2)
            work_place = cv2.selectROI('frame', frame, fromCenter=False, showCrosshair=True)
            x, y, w, h = [int(coord) for coord in work_place]
            
        # 
        if not date:
            try:
                cv2.putText(frame, 'Set the date, (0, 160), cv2.FONT_HERSHEY_SIMPLEX, 
                    2, (0,255,0), 2)
                date = cv2.selectROI('frame', frame, fromCenter=False, showCrosshair=True)
                date_x, date_y, date_w, date_h = [int(coord) for coord in date]
                date_ = frame[date_y : date_y+date_h, date_x : date_x+date_w]
                date_ = cv2.cvtColor(date_, cv2.COLOR_BGR2GRAY) #   
                #date_ = cv2.threshold(date_, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
                date_ = cv2.threshold(date_, 180, 255, 0)[1] #     
                date = pytesseract.image_to_string(date_)
                date = dt.datetime.strptime(date, '%Y-%m-%d %H:%M:%S')
                
            except:
                print('   ,      -- ::')
                date_ = input()
                date = dt.datetime.strptime(date_, '%Y-%m-%d %H:%M:%S')
                

        if cap.get(1) % fps == 0:
            date += dt.timedelta(seconds = 1)
        
        if not tracked or (cap.get(1) % (fps * 30) == 0):

            # 
            frame_resized = cv2.resize(frame, (300, 300)) #   300  300 
            blob = cv2.dnn.blobFromImage(frame_resized, 0.007843, 
                                         (300,300), (127.5, 127.5, 127.5), False)

            #    
            net.setInput(blob)
            detections = net.forward() 
            #[0, 0, object, [0, class_id, confidence, xLeftBottom, yLeftBottom, xRightTop, yRightTop]]

            #   
            cols = frame_resized.shape[1]
            rows = frame_resized.shape[0]

            #       
            for obj in detections[0,0, :, :]:
                confidence = obj[2]
                if confidence > thr:

                    class_id = int(obj[1])
                    if class_id == 15:

                        xLeftBottom = int(obj[3] * cols)
                        yLeftBottom = int(obj[4] * rows)
                        xRightTop   = int(obj[5] * cols)
                        yRightTop   = int(obj[6] * rows)

                        #     
                        heightFactor = frame.shape[0] / 300.0
                        widthFactor = frame.shape[1] / 300.0

                        #    
                        xLeftBottom = int(widthFactor * xLeftBottom)
                        yLeftBottom = int(heightFactor * yLeftBottom)
                        xRightTop   = int(widthFactor * xRightTop)
                        yRightTop   = int(heightFactor * yRightTop)

                        #    
                        xCenter = xLeftBottom + (xRightTop - xLeftBottom)/2
                        yCenter = yLeftBottom + (yRightTop - yLeftBottom)/2

                        #     
                        if xCenter < x + w and yCenter < y + h and xCenter > x and yCenter > y:
                            tracker = cv2.TrackerCSRT_create()
                            tracker.init(frame, (xLeftBottom, yLeftBottom, xRightTop-xLeftBottom, yRightTop-yLeftBottom))
                            tracked = True
                            cv2.rectangle(frame, (xLeftBottom,yLeftBottom), (xRightTop,yRightTop), (0,255,0), 3, 1)
                            break
                        else:
                            tracked = False
        else:
            _, bbox = tracker.update(frame)
            X, Y, W, H = [int(coord) for coord in bbox]

            xCenter = X + W/2
            yCenter = Y + H/2
            
            if xCenter < x + w and yCenter < y + h and xCenter > x and yCenter > y:
                
                tracked = True
                cv2.rectangle(frame, (X,Y), (X + W, Y + H), (255,255,0), 3, 1)
            else:
                tracked = False

        cv2.imshow('frame', frame)
        df.loc[cap.get(1), :] = [date, tracked]
        print(cap.get(1), date, tracked) #  ,   / 
        if cv2.waitKey(1) == 27: #ESC
            break
    else:
        break

cap.release()
cv2.destroyAllWindows()

.read() : , , – . , , , .

. , «-- ::». , : , . date

.

Tesseract-

, . , , .

.get() 1 , , fps

, date

. tesseract, , .

: cv2.resize() cv2.dnn.blobFromImage(). , . , detections

. , 20 .

, 15. , , tracked

True . tracked

date

df

.

, . , tracked

True False, .

. , , . .

, / . , , . , .

df_ = df.groupby('', as_index=False).agg(max)
df_.to_excel('output.xlsx', index=False)

, :

. , . opencv. , , .
. .
. , , . «» , .

Le premier et le deuxième problème peuvent être résolus par des trackers basés sur l'apprentissage en profondeur. Par exemple, un tracker GOTURN

. Ce tracker est implémenté dans la bibliothèque opencv

, mais pour son fonctionnement, vous devez télécharger des fichiers supplémentaires. Vous pouvez également utiliser le tracker populaire Re3

ou le tracker récemment introduit AcurusTrack

. Le troisième problème peut être résolu en remplaçant le réseau neuronal et / ou en le recyclant sur des personnes assises.

Lien vers le code .

Recherche de violations dans la vidéo à l'aide de la vision par ordinateur

More articles: