【问题标题】:Inserting a character in a string when there isn't a match in C当C中没有匹配时在字符串中插入一个字符
【发布时间】:2015-02-20 02:35:29
【问题描述】:

如果该字符不匹配,我正在尝试找出一种将字符插入字符串的方法。

假设我有这两个字符串:

s1: CGGGTATCCAA
s2: CCCTAGGTCCCA

它应该输出这个:

s1: C----GGGTATCC-AA
s2: CCCTAGG-T--CCCA-

算法如下:

if(lengthOfs1 > lengthOfs2)
    if character mismatch
       put a dash on s2
    else
       put the original character
else if(lengthOfs1 <= lengthOfs2)
    if character mistmatch
       put a dash on s1
    else
       put the original character

我试图通过拥有两个原始字符串并通过 for 循环循环直到我在字符串中点击 '\0' 来完成此操作。然后我进行比较,最后使用类似的东西:

strncpy(&s1_final_string[i + 1], &s1[i], 1) // if they are equal
strncpy(&s1_final_string[i], "-", 1); // if I need to put a dash

如果我们有不匹配的情况,是否有一种简单的方法来处理这种情况并复制“-”字符?

【问题讨论】:

  • 您似乎已经回答了自己的问题?你的算法看起来基本没问题。如果您将 s1_final_string 和 s2_final_string 的大小分配为 strlen(s1) + strlen(s2),那么您需要做的就是在迭代实现算法时跟踪您的 s1_index、s2_index、s1_final_index 和 s2_final_index。此外,无需使用 strncpy 而是直接设置字符。例如,s1_final_string[s1_final_index++] = s1_string[s1_index++];
  • 仔细想想,我认为这个问题比你想象的要困难得多。我认为您在 ACTUALLY 之后是一种差异算法,它找到两个字符串之间的最小差异(或最大匹配),然后用破折号填充不匹配的位置。类似于为 diff 程序提供动力的算法,但是是逐个字符而不是逐行进行的。
  • 你可以做一些更简单的事情,比如贪心算法。例如,在每个不匹配点处,提前搜索两个字符串中的下一个匹配字符,并且只将破折号添加到会导致该不匹配的破折号减少的字符串中。这可能会在该输入上为您提供所需的输出,但在其他情况下可能会产生次优结果。

标签: c string algorithm


【解决方案1】:

这是一个匹配您的输入和输出的贪心差分算法。请注意,此算法不会在任何两个字符串之间找到最小的不匹配。相反,在每个不匹配点,它会在两个字符串中向前扫描以找到下一个匹配点,并使用更接近的匹配点。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int greedy_diff_str(const char *s1, const char *s2, char **s1_final_ptr, char **s2_final_ptr)
{
  size_t s1_len = strlen(s1);
  size_t s2_len = strlen(s2);
  size_t s1_index = 0, s2_index = 0, final_index = 0;
  char  *s1_final, *s2_final;

  if (NULL == (s1_final = *s1_final_ptr = (char*) calloc(s1_len + s2_len + 1, 1)))
  {
    *s2_final_ptr = NULL;
    return -1;
  }

  if (NULL == (s2_final = *s2_final_ptr = (char*) calloc(s1_len + s2_len + 1, 1)))
  {
    free(s1_final);
    *s1_final_ptr = NULL;
    return -1;
  }

  while ('\0' != s1[s1_index] && '\0' != s2[s2_index])
  {
    if (s1[s1_index] == s2[s2_index])
    {
      s1_final[final_index]   = s1[s1_index++];
      s2_final[final_index++] = s2[s2_index++];
      //printf("s1: '%s'\ns2: '%s'\n", s1_final, s2_final);
    }
    else
    {
      size_t s1_dashes, s2_dashes, i;

      /* count how many dashes we'd have to add to s1 to reach next match point with s2 */

      for (i = s2_index + 1; '\0' != s2[i] && s1[s1_index] != s2[i]; ++i);
      s1_dashes = i - s2_index;

      /* count how many dashes we'd have to add to s2 to reach next match point with s1 */

      for (i = s1_index + 1; '\0' != s1[i] && s2[s2_index] != s1[i]; ++i);
      s2_dashes = i - s1_index;

      //printf("mismatch at s1[%lu] = '%c'; s2[%lu] = '%c'; s1_dashes = %lu; s2_dashes = %lu\n", s1_index, s1[s1_index], s2_index, s2[s2_index], s1_dashes, s2_dashes); 

      /* pick whichever path results in less dashes; break ties by adding dashes to string from which we've consumed more */

      if (s1_dashes < s2_dashes || (s1_dashes == s2_dashes && s1_index >= s2_index))
      {
        while (s1_dashes--)
        {
          s1_final[final_index]   = '-';
          s2_final[final_index++] = s2[s2_index++];
        }        
      }
      else
      {
        while (s2_dashes--)
        {
          s1_final[final_index]   = s1[s1_index++];
          s2_final[final_index++] = '-';
        }
      }

      //printf("s1: '%s'\ns2: '%s'\n", s1_final, s2_final);
    }
  }

  for (; '\0' != s1[s1_index]; ++s1_index, ++final_index)
  {
    s1_final[final_index] = s1[s1_index];
    s2_final[final_index] = '-';
    //printf("s1: '%s'\ns2: '%s'\n", s1_final, s2_final);
  }

  for (; '\0' != s2[s2_index]; ++s2_index, ++final_index)
  {
    s1_final[final_index] = '-';
    s2_final[final_index] = s2[s2_index];
    //printf("s1: '%s'\ns2: '%s'\n", s1_final, s2_final);
  }

  s1_final[final_index] = '\0';
  s2_final[final_index] = '\0';

  return 0;
}

int main()
{
  char s1[] = "CGGGTATCCAA", s2[] = "CCCTAGGTCCCA", *s1_fin, *s2_fin;

  printf("Input:\n");
  printf("s1: '%s'\n", s1);
  printf("s2: '%s'\n", s2);

  greedy_diff_str(s1, s2, &s1_fin, &s2_fin);

  printf("Output:\n");
  printf("s1: '%s'\n", s1_fin);
  printf("s2: '%s'\n", s2_fin);

  return 0;
}

这是运行的输出:

john-schultzs-macbook-pro:~ jschultz$ ./a.out
Input:
s1: 'CGGGTATCCAA'
s2: 'CCCTAGGTCCCA'
Output:
s1: 'C----GGGTATCC-AA'
s2: 'CCCTAGG-T--CCCA-'

【讨论】:

    猜你喜欢
    • 2021-11-13
    • 2022-01-08
    • 1970-01-01
    • 2017-08-09
    • 1970-01-01
    • 1970-01-01
    • 2023-03-08
    • 1970-01-01
    • 1970-01-01
    相关资源
    最近更新 更多