【问题标题】:How to properly calculate the cross-correlation?如何正确计算互相关?
【发布时间】:2017-02-19 01:44:11
【问题描述】:

我编写了一个程序,(希望)计算两个信号之间的互相关。尽管我已经很好地搜索了应该如何执行计算,但我无法弄清楚一些重要的细节。我特别关心平均计算。似乎有些算法利用整个数据集的平均值来执行每个班次(或延迟)的相关性计算。换句话说,他们使用恒定的平均值。我什至发现了一些只计算一次分母的算法,将其用作其余延迟的常数值。但是,我认为平均值和分母都应该迭代计算,只考虑叠加范围内的数据。因此,我为这个程序编写了两个版本。它们似乎在较小的延迟下产生了非常相似的结果。我想知道哪个是正确的。

迭代平均:

#include <stdio.h>
#include <stdlib.h>
#include <math.h>

FILE *input1, *input2, *output;
int m = 0, n = 0, t;
float *VarA, *VarB, *Results, *Results2;

void open_inputs_and_output();

void count_and_check_lines();

void allocate_memory();

void read_data();

void allocate_memory2();

void write_output();

int main()
{
    float SumAverageA = 0, SumAverageB = 0, AverageA, AverageB, SubA, SubB, SumAB = 0, SumAs = 0, SumBs = 0, Correl;
    int p = 0, i, delay;

    open_inputs_and_output();

    count_and_check_lines();

    rewind(input1);
    rewind(input2);

    allocate_memory();

    read_data();

    fclose(input1);
    fclose(input2);

    printf("How many lag steps from the origin do you want to calculate?\nIf you want as many steps as the number of input points, type -1.\n");
    scanf("%i", &p);

    if(p < -1)
    {
        printf("Bad number!\n");
        exit(1);
    }
    else if(p == -1)
        t = n;
    else
        t = p;

    allocate_memory2();

    printf("\nWait...\n\n");

    for(delay = 0; delay < t; delay++)
    {
        for(i = delay; i < n; i ++)
        {
            SumAverageA += VarA[i];
            SumAverageB += VarB[(i - delay)];
        }

        AverageA = SumAverageA / (n - delay);
        AverageB = SumAverageB / (n - delay);

        for(i = delay; i < n; i++)
        {
            SubA = VarA[i] - AverageA;
            SubB = VarB[(i - delay)] - AverageB;
            SumAB += (SubA * SubB);
            SumAs += (SubA * SubA);
            SumBs += (SubB * SubB);
        }

        Correl = SumAB / (sqrt(SumAs * SumBs));

        Results[delay] = Correl;

        SumAverageA = 0;
        SumAverageB = 0;
        SumAB = 0;
        SumAs = 0;
        SumBs = 0;

        for(i = delay; i < n; i++)
        {
            SubB = VarB[i] - AverageB;
            SubA = VarA[(i - delay)] - AverageA;
            SumAB += (SubA * SubB);
            SumAs += (SubA * SubA);
            SumBs += (SubB * SubB);
        }

        Correl = SumAB / (sqrt(SumAs * SumBs));

        Results2[delay] = Correl;

        SumAverageA = 0;
        SumAverageB = 0;
        SumAB = 0;
        SumAs = 0;
        SumBs = 0;
    }

    printf("Calculations performed.\n");

    free(VarA);
    free(VarB);

    write_output();

    free(Results);
    free(Results2);

    fclose(output);

    return 0;
}

void open_inputs_and_output()
{
    input1 = fopen("C:\\...\\test.txt","r");

    if (input1 == NULL)
    {
        printf("Error! Could not open input 1.\n");
        exit(1);
    }
    else
        printf("Input1 opening: OK.\n");

    input2 = fopen("C:\\...\\test2.txt","r");

    if (input2 == NULL)
    {
        printf("Error! Could not open input 2.\n");
        exit(1);
    }
    else
        printf("Input2 opening: OK.\n");

    output = fopen("C:\\...\\out.txt","w");

    if (output == NULL)
    {
        printf("Error! Could not open output.\n");
        exit(1);
    }
    else
        printf("Output opening: OK.\n");
}

void count_and_check_lines()
{
    float k;

    while(fscanf(input1,"%f",&k) == 1)
        n++;

    printf("n = %i\n", n);

    while(fscanf(input2,"%f",&k) == 1)
        m++;

    printf("m = %i\n", m);

    if(m != n)
    {
        printf("Error: Number of rows does not match!\n");
        exit(1);
    }
    else
        printf("Number of rows matches.\n");
}

void allocate_memory()
{
    VarA = calloc(n, sizeof(float));

    if(VarA == NULL)
    {
        printf("Could not allocate memory for VarA.\n");
        exit(1);
    }

    VarB = calloc(m, sizeof(float));

    if(VarA == NULL)
    {
        printf("Could not allocate memory for VarB.\n");
        exit(1);
    }
}

void read_data()
{
    int i;

    for(i = 0; i < n; i++)
        fscanf(input1,"%f",&VarA[i]);

    printf("Data A successfully read.\n");

    for(i = 0; i < m; i++)
        fscanf(input2,"%f",&VarB[i]);

    printf("Data B successfully read.\n");
}

void allocate_memory2()
{
    Results = calloc(t, sizeof(float));

    if(Results == NULL)
    {
        printf("Could not allocate memory for Results.\n");
        exit(1);
    }

    Results2 = calloc(t, sizeof(float));

    if(Results2 == NULL)
    {
        printf("Could not allocate memory for Results2.\n");
        exit(1);
    }
}

void write_output()
{
    int i;

    for(i = t - 1; i > 0; i--)
        fprintf(output,"-%i %f\n", i , Results2[i]);

    for(i = 0; i < t; i++)
        fprintf(output,"%i %f\n", i , Results[i]);

    printf("Results written.\n");
}

恒定平均值:

#include <stdio.h>
#include <stdlib.h>
#include <math.h>

FILE *input1, *input2, *output;
int m = 0, n = 0, t;
float *VarA, *VarB, *Results, *Results2;

void open_inputs_and_output();

void count_and_check_lines();

void allocate_memory();

void read_data();

void allocate_memory2();

void write_output();

int main()
{
    float SumAverageA = 0, SumAverageB = 0, AverageA, AverageB, SubA, SubB, SumAB = 0, SumAs = 0, SumBs = 0, Correl;
    int p = 0, i, delay;

    open_inputs_and_output();

    count_and_check_lines();

    rewind(input1);
    rewind(input2);

    allocate_memory();

    read_data();

    fclose(input1);
    fclose(input2);

    printf("How many lag steps from the origin do you want to calculate?\nIf you want as many steps as the number of input points, type -1.\n");
    scanf("%i", &p);

    if(p < -1)
    {
        printf("Bad number!\n");
        exit(1);
    }
    else if(p == -1)
        t = n;
    else
        t = p;

    allocate_memory2();

    printf("\nWait...\n\n");

    for(i = 0; i < n; i ++)
    {
        SumAverageA += VarA[i];
        SumAverageB += VarB[i];
    }

    AverageA = SumAverageA / n;
    AverageB = SumAverageB / n;

    for(delay = 0; delay < t; delay++)
    {
        for(i = delay; i < n; i++)
        {
            SubA = VarA[i] - AverageA;
            SubB = VarB[(i - delay)] - AverageB;
            SumAB += (SubA * SubB);
            SumAs += (SubA * SubA);
            SumBs += (SubB * SubB);
        }

        Correl = SumAB / (sqrt(SumAs * SumBs));

        Results[delay] = Correl;

        SumAverageA = 0;
        SumAverageB = 0;
        SumAB = 0;
        SumAs = 0;
        SumBs = 0;

        for(i = delay; i < n; i++)
        {
            SubB = VarB[i] - AverageB;
            SubA = VarA[(i - delay)] - AverageA;
            SumAB += (SubA * SubB);
            SumAs += (SubA * SubA);
            SumBs += (SubB * SubB);
        }

        Correl = SumAB / (sqrt(SumAs * SumBs));

        Results2[delay] = Correl;

        SumAverageA = 0;
        SumAverageB = 0;
        SumAB = 0;
        SumAs = 0;
        SumBs = 0;
    }

    printf("Calculations performed.\n");

    free(VarA);
    free(VarB);

    write_output();

    free(Results);
    free(Results2);

    fclose(output);

    return 0;
}

void open_inputs_and_output()
{
    input1 = fopen("C:\\...\\test.txt","r");

    if (input1 == NULL)
    {
        printf("Error! Could not open input 1.\n");
        exit(1);
    }
    else
        printf("Input1 opening: OK.\n");

    input2 = fopen("C:\\...\\test2.txt","r");

    if (input2 == NULL)
    {
        printf("Error! Could not open input 2.\n");
        exit(1);
    }
    else
        printf("Input2 opening: OK.\n");

    output = fopen("C:\\...\\out.txt","w");

    if (output == NULL)
    {
        printf("Error! Could not open output.\n");
        exit(1);
    }
    else
        printf("Output opening: OK.\n");
}

void count_and_check_lines()
{
    float k;

    while(fscanf(input1,"%f",&k) == 1)
        n++;

    printf("n = %i\n", n);

    while(fscanf(input2,"%f",&k) == 1)
        m++;

    printf("m = %i\n", m);

    if(m != n)
    {
        printf("Error: Number of rows does not match!\n");
        exit(1);
    }
    else
        printf("Number of rows matches.\n");
}

void allocate_memory()
{
    VarA = calloc(n, sizeof(float));

    if(VarA == NULL)
    {
        printf("Could not allocate memory for VarA.\n");
        exit(1);
    }

    VarB = calloc(m, sizeof(float));

    if(VarA == NULL)
    {
        printf("Could not allocate memory for VarB.\n");
        exit(1);
    }
}

void read_data()
{
    int i;

    for(i = 0; i < n; i++)
        fscanf(input1,"%f",&VarA[i]);

    printf("Data A successfully read.\n");

    for(i = 0; i < m; i++)
        fscanf(input2,"%f",&VarB[i]);

    printf("Data B successfully read.\n");
}

void allocate_memory2()
{
    Results = calloc(t, sizeof(float));

    if(Results == NULL)
    {
        printf("Could not allocate memory for Results.\n");
        exit(1);
    }

    Results2 = calloc(t, sizeof(float));

    if(Results2 == NULL)
    {
        printf("Could not allocate memory for Results2.\n");
        exit(1);
    }
}

void write_output()
{
    int i;

    for(i = t - 1; i > 0; i--)
        fprintf(output,"-%i %f\n", i , Results2[i]);

    for(i = 0; i < t; i++)
        fprintf(output,"%i %f\n", i , Results[i]);

    printf("Results written.\n");
}

参考资料:

http://www.jot.fm/issues/issue_2010_03/column2.pdf

http://paulbourke.net/miscellaneous/correlate/

【问题讨论】:

  • 欢迎来到 Stack Overflow。很抱歉,您在 5 个多小时内没有收到任何回复;这是相对不寻常的。由于您似乎已经找到了一些以各种方式做事的算法,也许答案是“当算法是应该使用的算法时,每个都是正确的”,而您的问题是您不确定应该在您的算法中使用哪种算法情况。您阅读了哪些参考资料?他们在算法何时适用时说了些什么?他们有没有谈到替代品以及为什么你不应该使用另一个? (请在您的问题中添加额外信息,而不是作为评论。)
  • 他们对替代方法没有多说。我的方法只是基于对相关方程和互相关概念的分析的逻辑推论。它可能是正确的,但也可能是不正确的。数据处理和统计并不完全是我的领域。

标签: c statistics signal-processing cross-correlation


【解决方案1】:

如果您的流程是wide sense stationary,则平均值不会随时间而变化。如果进程也是ergodic,那么可以通过计算单个时间序列的平均值来获得这些平均值。在这种情况下,您不妨使用所有可用的样本来尽可能准确地估计平均值。这自然会导致您的“恒定平均”实施。

另一方面,如果您的过程不是广义上的平稳和遍历的,那么获得对其局部均值的良好估计可能是一个更大的挑战。在过程大致静止的较小时间窗口内估计平均值可能是一种合理的方法(类似于您的“迭代平均”实现)。请注意,还有其他方法,它们的适用性取决于特定应用和特定信号的属性。

然后您可能想知道如何知道您的流程是否是广义静止的。不幸的是,除非您对这些过程是如何生成的非常了解,否则您通常能做的最好的事情就是在这些过程是广义平稳的假设下工作,然后尝试反驳该假设(通过观察超出预期范围的结果; 见statistical hypothesis testing)。

【讨论】:

  • 谢谢。它使事情更清楚。我正在处理一种蛋白质的分子动力学轨迹,它在开放状态和封闭状态之间波动(寿命从 5 到几十纳秒不等。考虑到在一个大的(但不足以导致蛋白质展开)时间尺度上平均值应该是恒定的,我相信测量的变量可以被认为是遍历的,但我不确定 WSS。因为在较短的时间尺度上,平均值确实会发生变化,但如果数据是在较大的时间尺度上累积的,那么平均值往往是恒定的.
猜你喜欢
  • 1970-01-01
  • 2011-10-22
  • 2015-04-12
  • 2023-02-24
  • 2012-08-27
  • 1970-01-01
  • 2018-01-19
  • 2018-02-28
  • 1970-01-01
相关资源
最近更新 更多