从编码图像和视频中提取 DCT 系数答案

【问题标题】：Extracting DCT coefficients from encoded images and video从编码图像和视频中提取 DCT 系数
【发布时间】：2011-05-27 02:05:54
【问题描述】：

有没有办法从编码的图像和视频中轻松提取 DCT 系数（和量化参数）？任何解码器软件都必须使用它们来解码块 DCT 编码的图像和视频。所以我很确定解码器知道它们是什么。有没有办法将它们暴露给使用解码器的人？

我正在实施一些直接在 DCT 域中工作的视频质量评估算法。目前，我的大部分代码都使用 OpenCV，所以如果有人知道使用该框架的解决方案，那就太好了。我不介意使用其他库（也许是 libjpeg，但这似乎只适用于静止图像），但我主要关心的是尽可能少地做特定于格式的工作（我不想重新发明轮子并编写我自己的解码器）。我希望能够打开 OpenCV 可以打开的任何视频/图像（H.264、MPEG、JPEG 等），如果它是块 DCT 编码的，以获得 DCT 系数。

在最坏的情况下，我知道我可以编写自己的块 DCT 代码，通过它运行解压缩的帧/图像，然后我会回到 DCT 域。这不是一个优雅的解决方案，我希望我能做得更好。

目前，我使用相当常见的 OpenCV 样板来打开图像：

IplImage *image = cvLoadImage(filename);
// Run quality assessment metric

我用于视频的代码同样简单：

CvCapture *capture = cvCaptureFromAVI(filename);    
while (cvGrabFrame(capture))
{
    IplImage *frame = cvRetrieveFrame(capture);
    // Run quality assessment metric on frame
}
cvReleaseCapture(&capture);

在这两种情况下，我都得到了 BGR 格式的 3 通道 IplImage。有什么方法可以得到 DCT 系数吗？

【问题讨论】：

标签： image-processing opencv video-processing dct

【解决方案1】：

嗯，我做了一些阅读，我最初的问题似乎是一厢情愿的例子。

基本上，不可能从 H.264 视频帧中获取 DCT 系数，原因很简单，即 H.264 doesn't use DCT。它使用不同的变换（整数变换）。接下来，该变换的系数不一定会逐帧改变——H.264 更智能，因为它将帧分割成片。应该可以通过特殊的解码器获取这些系数，但我怀疑 OpenCV 是否会为用户公开它。

对于 JPEG，情况要好一些。正如我所怀疑的，libjpeg 为您公开了 DCT 系数。我写了一个小应用程序来证明它可以工作（最后的来源）。它使用每个块的 DC 项制作一个新图像。因为 DC 项等于块平均值（经过适当缩放后），所以 DC 图像是输入 JPEG 图像的下采样版本。

编辑：源中的固定缩放

原始图像 (512 x 512)：

DC 图像 (64x64)：亮度 Cr Cb RGB

源码（C++）：

#include <stdio.h>
#include <assert.h>

#include <cv.h>    
#include <highgui.h>

extern "C"
{
#include "jpeglib.h"
#include <setjmp.h>
}

#define DEBUG 0
#define OUTPUT_IMAGES 1

/*
 * Extract the DC terms from the specified component.
 */
IplImage *
extract_dc(j_decompress_ptr cinfo, jvirt_barray_ptr *coeffs, int ci)
{
    jpeg_component_info *ci_ptr = &cinfo->comp_info[ci];
    CvSize size = cvSize(ci_ptr->width_in_blocks, ci_ptr->height_in_blocks);
    IplImage *dc = cvCreateImage(size, IPL_DEPTH_8U, 1);
    assert(dc != NULL);

    JQUANT_TBL *tbl = ci_ptr->quant_table;
    UINT16 dc_quant = tbl->quantval[0];

#if DEBUG
    printf("DCT method: %x\n", cinfo->dct_method);
    printf
    (
        "component: %d (%d x %d blocks) sampling: (%d x %d)\n", 
        ci, 
        ci_ptr->width_in_blocks, 
        ci_ptr->height_in_blocks,
        ci_ptr->h_samp_factor, 
        ci_ptr->v_samp_factor
    );

    printf("quantization table: %d\n", ci);
    for (int i = 0; i < DCTSIZE2; ++i)
    {
        printf("% 4d ", (int)(tbl->quantval[i]));
        if ((i + 1) % 8 == 0)
            printf("\n");
    }

    printf("raw DC coefficients:\n");
#endif

    JBLOCKARRAY buf =
    (cinfo->mem->access_virt_barray)
    (
        (j_common_ptr)cinfo,
        coeffs[ci],
        0,
        ci_ptr->v_samp_factor,
        FALSE
    );
    for (int sf = 0; (JDIMENSION)sf < ci_ptr->height_in_blocks; ++sf)
    {
        for (JDIMENSION b = 0; b < ci_ptr->width_in_blocks; ++b)
        {
            int intensity = 0;

            intensity = buf[sf][b][0]*dc_quant/DCTSIZE + 128;
            intensity = MAX(0,   intensity);
            intensity = MIN(255, intensity);

            cvSet2D(dc, sf, (int)b, cvScalar(intensity));

#if DEBUG
            printf("% 2d ", buf[sf][b][0]);                        
#endif
        }
#if DEBUG
        printf("\n");
#endif
    }

    return dc;

}

IplImage *upscale_chroma(IplImage *quarter, CvSize full_size)
{
    IplImage *full = cvCreateImage(full_size, IPL_DEPTH_8U, 1);
    cvResize(quarter, full, CV_INTER_NN);
    return full;
}

GLOBAL(int)
read_JPEG_file (char * filename, IplImage **dc)
{
  /* This struct contains the JPEG decompression parameters and pointers to
   * working space (which is allocated as needed by the JPEG library).
   */
  struct jpeg_decompress_struct cinfo;

  struct jpeg_error_mgr jerr;
  /* More stuff */
  FILE * infile;        /* source file */

  /* In this example we want to open the input file before doing anything else,
   * so that the setjmp() error recovery below can assume the file is open.
   * VERY IMPORTANT: use "b" option to fopen() if you are on a machine that
   * requires it in order to read binary files.
   */

  if ((infile = fopen(filename, "rb")) == NULL) {
    fprintf(stderr, "can't open %s\n", filename);
    return 0;
  }

  /* Step 1: allocate and initialize JPEG decompression object */

  cinfo.err = jpeg_std_error(&jerr);

  /* Now we can initialize the JPEG decompression object. */
  jpeg_create_decompress(&cinfo);

  /* Step 2: specify data source (eg, a file) */

  jpeg_stdio_src(&cinfo, infile);

  /* Step 3: read file parameters with jpeg_read_header() */

  (void) jpeg_read_header(&cinfo, TRUE);
  /* We can ignore the return value from jpeg_read_header since
   *   (a) suspension is not possible with the stdio data source, and
   *   (b) we passed TRUE to reject a tables-only JPEG file as an error.
   * See libjpeg.txt for more info.
   */

  /* Step 4: set parameters for decompression */

  /* In this example, we don't need to change any of the defaults set by
   * jpeg_read_header(), so we do nothing here.
   */

  jvirt_barray_ptr *coeffs = jpeg_read_coefficients(&cinfo);

  IplImage *y    = extract_dc(&cinfo, coeffs, 0);
  IplImage *cb_q = extract_dc(&cinfo, coeffs, 1);
  IplImage *cr_q = extract_dc(&cinfo, coeffs, 2);

  IplImage *cb = upscale_chroma(cb_q, cvGetSize(y));
  IplImage *cr = upscale_chroma(cr_q, cvGetSize(y));

  cvReleaseImage(&cb_q);
  cvReleaseImage(&cr_q);

#if OUTPUT_IMAGES
  cvSaveImage("y.png",   y);
  cvSaveImage("cb.png", cb);
  cvSaveImage("cr.png", cr);
#endif

  *dc = cvCreateImage(cvGetSize(y), IPL_DEPTH_8U, 3);
  assert(dc != NULL);

  cvMerge(y, cr, cb, NULL, *dc);

  cvReleaseImage(&y);
  cvReleaseImage(&cb);
  cvReleaseImage(&cr);

  /* Step 7: Finish decompression */

  (void) jpeg_finish_decompress(&cinfo);
  /* We can ignore the return value since suspension is not possible
   * with the stdio data source.
   */

  /* Step 8: Release JPEG decompression object */

  /* This is an important step since it will release a good deal of memory. */
  jpeg_destroy_decompress(&cinfo);

  fclose(infile);

  return 1;
}

int 
main(int argc, char **argv)
{
    int ret = 0;
    if (argc != 2)
    {
        fprintf(stderr, "usage: %s filename.jpg\n", argv[0]);
        return 1;
    }
    IplImage *dc = NULL;
    ret = read_JPEG_file(argv[1], &dc);
    assert(dc != NULL);

    IplImage *rgb = cvCreateImage(cvGetSize(dc), IPL_DEPTH_8U, 3);
    cvCvtColor(dc, rgb, CV_YCrCb2RGB);

#if OUTPUT_IMAGES
    cvSaveImage("rgb.png", rgb);
#else
    cvNamedWindow("DC", CV_WINDOW_AUTOSIZE); 
    cvShowImage("DC", rgb);
    cvWaitKey(0);
#endif

    cvReleaseImage(&dc);
    cvReleaseImage(&rgb);

    return 0;
}

【讨论】：

这个 DC_SIZE 是什么以及它来自哪里。当我编译你的源代码时，我得到一个错误 main_read.c:85:48: error: ‘DC_SIZE’ is not declared in this scope
我认为这是一个错字。如果您查看编辑历史记录，您会发现它在之前的编辑中是 DCTSIZE。我现在没有机会确认，但当我确认时，我会更新我的答案。感谢您指出这个问题。
DCTSIZE 实际上是正确的。经过一些试验，我可以确认。

【解决方案2】：

您可以使用 libjpeg 提取 jpeg 文件的 dct 数据，但对于 h.264 视频文件，我找不到任何开源代码你 dct 数据（实际上是整数 dct 数据）。但是您可以使用 JM、JSVM 或 x264 等 h.264 开源软件。在这两个源文件中，你必须找到它们使用dct函数的具体函数，并将其更改为你想要的形式，以获得你的输出dct数据。

图片： 使用以下代码，在read_jpeg_file( infilename, v, quant_tbl )、v 和quant_tbl 之后将分别拥有您的 jpeg 图像的dct data 和quantization table。

我使用 Qvector 来存储我的输出数据，将其更改为您喜欢的 c++ 数组列表。

#include <iostream>
#include <stdio.h>
#include <jpeglib.h>
#include <stdlib.h>
#include <setjmp.h>
#include <fstream>

#include <QVector>

int read_jpeg_file( char *filename, QVector<QVector<int> > &dct_coeff, QVector<unsigned short> &quant_tbl)
{
    struct jpeg_decompress_struct cinfo;
    struct jpeg_error_mgr jerr;
    FILE * infile;

    if ((infile = fopen(filename, "rb")) == NULL) {
      fprintf(stderr, "can't open %s\n", filename);
      return 0;
    }

    cinfo.err = jpeg_std_error(&jerr);
    jpeg_create_decompress(&cinfo);
    jpeg_stdio_src(&cinfo, infile);
    (void) jpeg_read_header(&cinfo, TRUE);

    jvirt_barray_ptr *coeffs_array = jpeg_read_coefficients(&cinfo);
    for (int ci = 0; ci < 1; ci++)
    {
        JBLOCKARRAY buffer_one;
        JCOEFPTR blockptr_one;
        jpeg_component_info* compptr_one;
        compptr_one = cinfo.comp_info + ci;

        for (int by = 0; by < compptr_one->height_in_blocks; by++)
        {
            buffer_one = (cinfo.mem->access_virt_barray)((j_common_ptr)&cinfo, coeffs_array[ci], by, (JDIMENSION)1, FALSE);
            for (int bx = 0; bx < compptr_one->width_in_blocks; bx++)
            {
                blockptr_one = buffer_one[0][bx];
                QVector<int> tmp;
                for (int bi = 0; bi < 64; bi++)
                {
                    tmp.append(blockptr_one[bi]);
                }
                dct_coeff.push_back(tmp);
            }
        }
    }


    // coantization table
    j_decompress_ptr dec_cinfo  = (j_decompress_ptr) &cinfo;
    jpeg_component_info *ci_ptr = &dec_cinfo->comp_info[0];
    JQUANT_TBL *tbl = ci_ptr->quant_table;

    for(int ci =0 ; ci < 64; ci++){
        quant_tbl.append(tbl->quantval[ci]);
    }

    return 1;
}

int main()
{
    QVector<QVector<int> > v;
    QVector<unsigned short> quant_tbl;
    char *infilename = "your_image.jpg";

    std::ofstream out;
    out.open("out_dct.txt");


    if( read_jpeg_file( infilename, v, quant_tbl ) > 0 ){

        for(int j = 0; j < v.size(); j++ ){
                for (int i = 0; i < v[0].size(); ++i){
                    out << v[j][i] << "\t";
            }
            out << "---------------" << std::endl;
        }

        out << "\n\n\n" << std::string(10,'-') << std::endl;
        out << "\nQauntization Table:" << std::endl;
        for(int i = 0; i < quant_tbl.size(); i++ ){
            out << quant_tbl[i] << "\t";
        }
    }
    else{
        std::cout << "Can not read, Returned With Error";
        return -1;
    }

    out.close();

return 0;
}

【讨论】：

仅供参考，这不适用于所有 JPEG 图像这是该功能的简单版本，您有时需要根据样本因素获取多于一个的行