【问题标题】:How to quantify difference between frames using optical flow estimation?如何使用光流估计量化帧之间的差异?
【发布时间】:2020-06-29 18:43:01
【问题描述】:

这是从稳定的视频(无摄像机移动)中获取光流输出并将其保存为一组帧的代码

import cv2 as cv
import numpy as np

# The video feed is read in as a VideoCapture object
cap = cv.VideoCapture("2_stable_video.avi")

# ret = a boolean return value from getting the frame, first_frame = the first frame in the entire video sequence
ret, first_frame = cap.read()

# Converts frame to grayscale because we only need the luminance channel for detecting edges - less computationally expensive

prev_gray = cv.cvtColor(first_frame, cv.COLOR_BGR2GRAY)

# Creates an image filled with zero intensities with the same dimensions as the frame
mask = np.zeros_like(first_frame)

# Sets image saturation to maximum
mask[..., 1] = 255

count = 0
while(cap.isOpened()):
    # ret = a boolean return value from getting the frame, frame = the current frame being projected in the video
    ret, frame = cap.read()

    # Opens a new window and displays the input frame
    cv.imshow("input", frame)

    # Converts each frame to grayscale - we previously only converted the first frame to grayscale
    gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
    # Calculates dense optical flow by Farneback method
    flow = cv.calcOpticalFlowFarneback(prev_gray, gray, None, 0.5, 3, 15, 3, 5, 1.2, 0)

    # Computes the magnitude and angle of the 2D vectors
    magnitude, angle = cv.cartToPolar(flow[..., 0], flow[..., 1])

    # Sets image hue according to the optical flow direction
    mask[..., 0] = angle * 180 / np.pi / 2

    # Sets image value according to the optical flow magnitude (normalized)
    mask[..., 2] = cv.normalize(magnitude, None, 0, 255, cv.NORM_MINMAX)

    # Converts HSV to RGB (BGR) color representation
    rgb = cv.cvtColor(mask, cv.COLOR_HSV2BGR)

    # Opens a new window and displays the output frame
    cv.imshow("dense optical flow", rgb[40:150,120:220])
    cv.imwrite("frames_modified_2/%d.png" % count, rgb[40:150,120:220])
    count +=1

    # Updates previous frame
    prev_gray = gray

    # Frames are read by intervals of 1 millisecond. The programs breaks out of the while loop when the user presses the 'q' key
    if cv.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv.destroyAllWindows()

有人可以建议如何量化帧之间的差异吗?即估计速度/速度?

【问题讨论】:

    标签: python opencv image-processing scipy computer-vision


    【解决方案1】:

    这是一个从.bsq 帧获取像素幅度转换的示例。您可以修改代码以输入视频文件。您可能对get_translation() 函数最感兴趣。示例:

    显示逐帧像素转换的图表

    代码

    import numpy as np
    import argparse
    import os
    import cv2
    from matplotlib import pyplot as plt
    from matplotlib import cm
    import time
    import random
    
    # Usage: python translate_analyzer.py -p <filename.bsq>
    
    # Automatic brightness and contrast optimization with optional histogram clipping
    def automatic_brightness_and_contrast(image, clip_hist_percent=25):
        if len(image.shape) == 3:
            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        else:
            gray = image
    
        # Calculate grayscale histogram
        hist = cv2.calcHist([gray],[0],None,[256],[0,256])
        hist_size = len(hist)
    
        # Calculate cumulative distribution from the histogram
        accumulator = []
        accumulator.append(float(hist[0]))
        for index in range(1, hist_size):
            accumulator.append(accumulator[index -1] + float(hist[index]))
    
        # Locate points to clip
        maximum = accumulator[-1]
        clip_hist_percent *= (maximum/100.0)
        clip_hist_percent /= 2.0
    
        # Locate left cut
        minimum_gray = 0
        while accumulator[minimum_gray] < clip_hist_percent:
            minimum_gray += 1
    
        # Locate right cut
        maximum_gray = hist_size -1
        while accumulator[maximum_gray] >= (maximum - clip_hist_percent):
            maximum_gray -= 1
    
        # Calculate alpha and beta values
        alpha = 255 / (maximum_gray - minimum_gray)
        beta = -minimum_gray * alpha
    
        auto_result = cv2.convertScaleAbs(image, alpha=alpha, beta=beta)
        return (auto_result, alpha, beta)
    
    # Draw flow
    def draw_flow(img, flow, step=30):
        h, w = img.shape[:2]
        y, x = np.mgrid[step/2:h:step, step/2:w:step].reshape(2,-1).astype(int)
        fx, fy = flow[y,x].T
        lines = np.vstack([x, y, x+fx, y+fy]).T.reshape(-1, 2, 2)
        lines = np.int32(lines + 0.5)
        vis = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
        cv2.polylines(vis, lines, 1, (36, 255, 12))
        for (x1, y1), (_x2, _y2) in lines:
            cv2.circle(vis, (x1, y1), 2, (36, 255, 12), -1)
        return vis
    
    # Return translation value
    def get_translation(img, flow, step=30):
        return (np.median(flow[:,:,0].T), flow[:, :, 0].T)
    
    # Get file path
    ap = argparse.ArgumentParser()
    ap.add_argument("-p", "--path", help="Path to the directory")
    args = vars(ap.parse_args())
    
    if not args['path']:
        print('Usage: python translate_analyzer.py -p <directory>')
        exit(1)
    
    # Extract file name
    bsq_fname = os.path.split(args['path'])[-1]
    
    if '.bsq' not in bsq_fname:
        print('ERROR: Invalid bsq file. Select correct file.')
        exit(1)
    
    width = 640
    height = 512
    frame_count = int(os.path.getsize(bsq_fname)/(2*height*width))
    x,y,w,h = 0,0,100,512
    
    # Simulates calibrated frames to display on video frame
    data_file = np.fromfile(bsq_fname, dtype=np.uint16, count=-1)
    data_file = data_file.reshape((width, height, frame_count), order='F')
    data_file = np.rot90(data_file)
    
    print(bsq_fname)
    fname = bsq_fname.split()[0]
    prev = data_file[:,:,0].copy()
    prev //= 64
    prev = automatic_brightness_and_contrast(prev)[0]
    prev = prev[y:y+h, x:x+w]
    
    translation_data = []
    frame_direction = []
    start = time.time()
    for index in range(1, frame_count):
        data = data_file[:,:,index].copy()
        data //= 64
        data = automatic_brightness_and_contrast(data)[0]
        data = data[y:y+h, x:x+w]
    
        flow = cv2.calcOpticalFlowFarneback(prev=prev, next=data, flow=None, pyr_scale=0.5, levels=2, winsize=80, iterations=2, poly_n=7, poly_sigma=4.5, flags=0)
        translation, pixel_direction = get_translation(data, flow)
        prev = data
    
        cv2.imshow('flow', draw_flow(data, flow))
        cv2.waitKey(1)
    
        translation_data.append(translation)
        frame_direction = pixel_direction
    
        index = (index+1) % frame_count
    
    end = time.time()
    print('Time:', end - start)
    
    plt.figure()
    plt.title(bsq_fname)
    plt.xlabel("Frames")
    plt.ylabel("Magnitude")
    plt.plot(translation_data)
    
    plt.figure()
    plt.title("Pixel Direction")
    plt.xlabel("Width")
    plt.ylabel("Height")
    plt.imshow(frame_direction.T)
    plt.colorbar(orientation='vertical')
    plt.show()
    

    【讨论】:

    • 您好,感谢您的贡献。是否有可能知道为什么您使用中位数而不是平均值来衡量运动?是因为它对异常值敏感吗?
    • @petemir 没错,您可以使用均值或众数,但这取决于您的数据集
    猜你喜欢
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 2021-06-22
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 2010-09-16
    • 2019-08-12
    相关资源
    最近更新 更多