您的代码存在不止一个基本问题,唉。
- 您正在努力退回 VLA。那是行不通的;不要这样做。
- 您的分隔符字符串没有以空值结尾。
- 您的函数无法自行确定令牌的数量。
但是,我认为这是一个有趣的编程练习,并总结了一个通用的解决方案。这是带有文档的标题和完全可选的默认参数宏魔术(感谢Braden Steffaniak’sexcellent macro mojo here):
split.h
// Copyright 2021 Michael Thomas Greer.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt or copy at
// https://www.boost.org/LICENSE_1_0.txt )
/*
char **
split(
const char * s,
const char * sep = NULL, // --> whitespace: " \f\n\r\v\t"
bool is_dup_s = true, // --> non-destructive of source?
int granularity = 0 // --> default granularity
);
Function:
Split a string into tokens, much like strtok(). Tokens are delimited
by the argument separator characters. Empty tokens are not returned.
Returns:
• a NULL-terminated array of pointers to the tokens in s.
You must free() the resulting array. Do NOT free individual tokens!
• NULL on failure (due to a memory re/allocation failure).
Arguments:
s • The source string to tokenize.
sep • Separator characters. Defaults to all whitespace.
is_dup_s • By default the source string is duplicated so that
the tokenization can be done non-destructively (for
example, on literals). If you don't care about the
source, or the source is sufficiently large that
duplication could be a problem, then turn this off.
granularity • The algorithm works by building a table of token
indices. This is the growth size of that table.
It defaults to a reasonably small size. But if you
have a good idea of the number of tokens you will
typically generate, set it to that.
Uses totally-optional macro magic for elided default arguments.
No macros == no elided default argument magic. (You can still specify
default values for arguments, though.)
*/
#ifndef DUTHOMHAS_SPLIT_H
#define DUTHOMHAS_SPLIT_H
#include <stdbool.h>
char ** split( const char * s, const char * sep, bool is_dup_s, int granularity );
// https://stackoverflow.com/a/24028231/2706707
#define SPLIT_GLUE(x, y) x y
#define SPLIT_RETURN_ARG_COUNT(_1_, _2_, _3_, _4_, count, ...) count
#define SPLIT_EXPAND_ARGS(args) SPLIT_RETURN_ARG_COUNT args
#define SPLIT_COUNT_ARGS_MAX5(...) SPLIT_EXPAND_ARGS((__VA_ARGS__, 4, 3, 2, 1, 0))
#define SPLIT_OVERLOAD_MACRO2(name, count) name##count
#define SPLIT_OVERLOAD_MACRO1(name, count) SPLIT_OVERLOAD_MACRO2(name, count)
#define SPLIT_OVERLOAD_MACRO(name, count) SPLIT_OVERLOAD_MACRO1(name, count)
#define SPLIT_CALL_OVERLOAD(name, ...) SPLIT_GLUE(SPLIT_OVERLOAD_MACRO(name, SPLIT_COUNT_ARGS_MAX5(__VA_ARGS__)), (__VA_ARGS__))
#define split(...) SPLIT_CALL_OVERLOAD( SPLIT, __VA_ARGS__ )
#define SPLIT1(s) (split)( s, NULL, true, 0 )
#define SPLIT2(s,sep) (split)( s, sep, true, 0 )
#define SPLIT3(s,sep,ids) (split)( s, sep, ids, 0 )
#define SPLIT4(s,sep,ids,g) (split)( s, sep, ids, g )
#endif
这是重要的一点:
split.c
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
char ** split( const char * s, const char * sep, bool is_dup_s, int granularity )
{
char ** result;
typedef size_t slot[ 2 ];
int max_slots = (granularity > 0) ? granularity : 32;
int num_slots = 0;
size_t index = 0;
slot * slots = (slot *)malloc( sizeof(slot) * max_slots );
if (!slots) return NULL;
if (!sep) sep = " \f\n\r\v\t";
// Find all tokens
while (s[ index ])
{
index += strspn( s + index, sep ); // skip any leading separators --> beginning of next token
if (!s[ index ]) break; // no more tokens
if (num_slots == max_slots) // assert: slots available
{
slot * new_slots = (slot *)realloc( slots, sizeof(slot) * (max_slots += granularity) );
if (!new_slots) { free( slots ); return NULL; }
slots = new_slots;
}
slots[ num_slots ][ 0 ] = index; // beginning of token
slots[ num_slots++ ][ 1 ] = index += strcspn( s + index, sep ); // skip non-separators --> end of token
}
// Allocate and build the string array
result = (char **)malloc( sizeof(char *) * ++num_slots + (is_dup_s ? index + 1 : 0) );
if (result)
{
char * d = is_dup_s ? (char *)(&result[ num_slots ]) : (char *)s;
if (is_dup_s) memcpy( d, s, index + 1 );
result[--num_slots ] = NULL;
while (num_slots --> 0)
{
result[ num_slots ] = d + slots[ num_slots ][ 0 ];
d[ slots[ num_slots ][ 1 ] ] = '\0';
}
}
free( slots );
return result;
}
下面是一些使用它的示例代码:
a.c
#include <stdio.h>
#include "split.h"
void test( const char * s, char ** ss )
{
printf( "%s\n", s );
for (int n = 0; ss[n]; ++n)
printf( " %d: \"%s\"\n", n, ss[n] );
free( ss );
printf( "\n" );
}
#define TEST(x) test( #x , x )
int main()
{
TEST( split( "Hello world! \n" ) );
TEST( split( " 2, 3, 5, 7, 11, ", /*sep*/", " ) );
TEST( split( "::::", ":" ) );
TEST( split( "", ":" ) );
TEST( split( "", NULL, true, 15 ) );
TEST( split( "a b c d e", NULL ) );
TEST( split( " - a---b c - d - ", " -", true, 1 ) );
char s[] = "Never trust a computer you can't throw out a window. --Abraham Lincoln";
printf( "s = \"%s\"\n", s );
TEST( split( s, " -.", false ) );
printf( "Modified s will print only the first token: \"%s\"\n", s );
}
在 Windows 10 上测试使用
- MSVC 2019 (19.21.27702.2)
cl /EHsc /W4 /Ox a.c split.c
- LLVM/Clang 9.0.0
clang -Wall -Wextra -pedantic-errors -O3 -o a.exe a.c split.c
在 Ubuntu 20.04 上使用
- GCC 9.3.0
gcc -Wall -Wextra -pedantic-errors -O3 a.c split.c
- Clang 10.0.0
clang -Wall -Wextra -pedantic-errors -O3 a.c split.c
解释一下这种疯狂!
我知道您是初学者,这比您预期的要多得多。不用担心,使用字符串和动态分配的内存实际上是相当困难的。很多人总是搞错。
这里使用的技巧是使用strspn() 和strcspn() 库函数在字符串中为每个标记的开头和结尾构建一个临时索引列表——与strtok() 内部使用的函数完全相同。该列表可以根据需要动态增长。
一旦该列表完成,我们分配足够的内存来存储每个标记 + 1 的指针(对于数组末尾的 NULL 指针),可选地后跟源字符串的副本。
然后我们简单地计算在字符串中索引的标记的指针值(地址),修改字符串就像strtok() 所做的那样以空终止每个标记。
结果是单个内存块,因此当用户完成对数组的迭代时,它可以直接传递给free()。示例测试函数使用整数索引对数组进行迭代,但字符串迭代器(指向 char 指针的指针)也可以:
char ** tokens = split( my_string, my_delimiters ); // Get tokens
for (char ** ptoken = tokens; *ptoken; ++ptoken) // For each token
printf( " %s\n", *ptoken ); // (do something with it)
free( tokens ); // Free tokens
就是这样!