strstr是否有反向功能?

Man*_*rma 16 c c++ string

我试图找到一个类似的功能strstr,搜索从字符串的开头到字符串开头的子字符串.

小智 7

标准C库没有"反向strstr"功能,因此您必须自己查找或编写.

我想出了一些我自己的解决方案,并在此线程中添加了一些测试和基准测试代码以及​​其他功能.对于那些好奇的,在我的笔记本电脑上运行(Ubuntu karmic,amd64架构),输出看起来像这样:

$ gcc -O2 --std=c99 strrstr.c && ./a.out
#1 0.123 us last_strstr
#2 0.440 us theo
#3 0.460 us cordelia
#4 1.690 us digitalross
#5 7.700 us backwards_memcmp
#6 8.600 us sinan
Run Code Online (Sandbox Code Playgroud)

您的结果可能会有所不同,并且根据您的编译器和库,结果的顺序也可能不同.

要从字符串的开头获取匹配的偏移量(索引),请使用指针算术:

char *match = last_strstr(haystack, needle);
ptrdiff_t index;
if (match != NULL)
    index = match - haystack;
else
    index = -1;
Run Code Online (Sandbox Code Playgroud)

而现在,落叶松(请注意,这纯粹是在C中,我不太了解C++,无法给出答案):

#include <string.h>
#include <stdlib.h>

/* By liw. */
static char *last_strstr(const char *haystack, const char *needle)
{
    if (*needle == '\0')
        return (char *) haystack;

    char *result = NULL;
    for (;;) {
        char *p = strstr(haystack, needle);
        if (p == NULL)
            break;
        result = p;
        haystack = p + 1;
    }

    return result;
}


/* By liw. */
static char *backwards_memcmp(const char *haystack, const char *needle)
{
    size_t haylen = strlen(haystack);

    if (*needle == '\0')
        return (char *) haystack;

    size_t needlelen = strlen(needle);
    if (needlelen > haylen)
        return NULL;

    const char *p = haystack + haylen - needlelen;
    for (;;) {
        if (memcmp(p, needle, needlelen) == 0)
            return (char *) p;
        if (p == haystack)
            return NULL;
        --p;
    }
}


/* From http://stuff.mit.edu/afs/sipb/user/cordelia/Diplomacy/mapit/strrstr.c
 */
static char *cordelia(const char *s1, const char *s2)
{
 const char *sc1, *sc2, *psc1, *ps1;

 if (*s2 == '\0')
  return((char *)s1);

 ps1 = s1 + strlen(s1);

 while(ps1 != s1) {
  --ps1;
  for (psc1 = ps1, sc2 = s2; ; )
   if (*(psc1++) != *(sc2++))
    break;
   else if (*sc2 == '\0')
    return ((char *)ps1);
 }
 return ((char *)NULL);
}


/* From http://stackoverflow.com/questions/1634359/
   is-there-a-reverse-fn-for-strstr/1634398#1634398 (DigitalRoss). */
static char *reverse(const char *s)
{
  if (s == NULL)
    return NULL;
  size_t i, len = strlen(s);
  char *r = malloc(len + 1);

  for(i = 0; i < len; ++i)
    r[i] = s[len - i - 1];
  r[len] = 0;
  return r;
}
char *digitalross(const char *s1, const char *s2)
{
  size_t  s1len = strlen(s1);
  size_t  s2len = strlen(s2);
  const char *s;

  if (s2len == 0)
    return (char *) s1;
  if (s2len > s1len)
    return NULL;
  for (s = s1 + s1len - s2len; s >= s1; --s)
    if (strncmp(s, s2, s2len) == 0)
      return (char *) s;
  return NULL;
}


/* From http://stackoverflow.com/questions/1634359/
  is-there-a-reverse-fn-for-strstr/1634487#1634487 (Sinan Ünür). */

char *sinan(const char *source, const char *target)
{
    const char *current;
    const char *found = NULL;

    if (*target == '\0')
        return (char *) source;

    size_t target_length = strlen(target);
    current = source + strlen(source) - target_length;

    while ( current >= source ) {
        if ( (found = strstr(current, target)) ) {
            break;
        }
        current -= 1;
    }

    return (char *) found;
}


/* From http://stackoverflow.com/questions/1634359/
  is-there-a-reverse-fn-for-strstr/1634441#1634441 (Theo Spears). */
char *theo(const char* haystack, const char* needle)
{
  size_t needle_length = strlen(needle);
  const char* haystack_end = haystack + strlen(haystack) - needle_length;
  const char* p;
  size_t i;

  if (*needle == '\0')
    return (char *) haystack;
  for(p = haystack_end; p >= haystack; --p)
  {
    for(i = 0; i < needle_length; ++i) {
      if(p[i] != needle[i])
        goto next;
    }
    return (char *) p;

    next:;
  }
  return 0;
}


/*
 * The rest of this code is a test and timing harness for the various
 * implementations above.
 */


#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>


/* Check that the given function works. */
static bool works(const char *name, char *(*func)(const char *, const char *))
{
    struct {
        const char *haystack;
        const char *needle;
        int offset;
    } tests[] = {
        { "", "", 0 },
        { "", "x", -1 },
        { "x", "", 0 },
        { "x", "x", 0 },
        { "xy", "x", 0 },
        { "xy", "y", 1 },
        { "xyx", "x", 2 },
        { "xyx", "y", 1 },
        { "xyx", "z", -1 },
        { "xyx", "", 0 },
    };
    const int num_tests = sizeof(tests) / sizeof(tests[0]);
    bool ok = true;

    for (int i = 0; i < num_tests; ++i) {
        int offset;
        char *p = func(tests[i].haystack, tests[i].needle);
        if (p == NULL)
            offset = -1;
        else
            offset = p - tests[i].haystack;
        if (offset != tests[i].offset) {
            fprintf(stderr, "FAIL %s, test %d: returned %d, haystack = '%s', "
                            "needle = '%s', correct return %d\n",
                            name, i, offset, tests[i].haystack, tests[i].needle,
                            tests[i].offset);
            ok = false;
        }
    }
    return ok;
}


/* Dummy function for calibrating the measurement loop. */
static char *dummy(const char *haystack, const char *needle)
{
    return NULL;
}


/* Measure how long it will take to call the given function with the
   given arguments the given number of times. Return clock ticks. */
static clock_t repeat(char *(*func)(const char *, const char *),
                       const char *haystack, const char *needle,
                       long num_times)
{
    clock_t start, end;

    start = clock();
    for (long i = 0; i < num_times; ++i) {
        func(haystack, needle);
    }
    end = clock();
    return end - start;
}


static clock_t min(clock_t a, clock_t b)
{
    if (a < b)
        return a;
    else
        return b;
}


/* Measure the time to execute one call of a function, and return the
   number of CPU clock ticks (see clock(3)). */
static double timeit(char *(*func)(const char *, const char *))
{
    /* The arguments for the functions to be measured. We deliberately
       choose a case where the haystack is large and the needle is in
       the middle, rather than at either end. Obviously, any test data
       will favor some implementations over others. This is the weakest
       part of the benchmark. */

    const char haystack[] = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
                            "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
                            "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
                            "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
                            "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
                            "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
                            "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
                            "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
                            "b"
                            "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
                            "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
                            "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
                            "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
                            "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
                            "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
                            "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
                            "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
    const char needle[] = "b";

    /* First we find out how many repeats we need to do to get a sufficiently
       long measurement time. These functions are so fast that measuring
       only a small number of repeats will give wrong results. However,
       we don't want to do a ridiculously long measurement, either, so 
       start with one repeat and multiply it by 10 until the total time is
       about 0.2 seconds. 

       Finally, we measure the dummy function the same number of times
       to get rid of the call overhead.

       */

    clock_t mintime = 0.2 * CLOCKS_PER_SEC;
    clock_t clocks;
    long repeats = 1;
    for (;;) {
        clocks = repeat(func, haystack, needle, repeats);
        if (clocks >= mintime)
            break;
        repeats *= 10;
    }

    clocks = min(clocks, repeat(func, haystack, needle, repeats));
    clocks = min(clocks, repeat(func, haystack, needle, repeats));

    clock_t dummy_clocks;

    dummy_clocks = repeat(dummy, haystack, needle, repeats);
    dummy_clocks = min(dummy_clocks, repeat(dummy, haystack, needle, repeats));
    dummy_clocks = min(dummy_clocks, repeat(dummy, haystack, needle, repeats));

    return (double) (clocks - dummy_clocks) / repeats / CLOCKS_PER_SEC;
}


/* Array of all functions. */
struct func {
    const char *name;
    char *(*func)(const char *, const char *);
    double secs;
} funcs[] = {
#define X(func) { #func, func, 0 }
    X(last_strstr),
    X(backwards_memcmp),
    X(cordelia),
    X(digitalross),
    X(sinan),
    X(theo),
#undef X
};
const int num_funcs = sizeof(funcs) / sizeof(funcs[0]);


/* Comparison function for qsort, comparing timings. */
int funcmp(const void *a, const void *b)
{
    const struct func *aa = a;
    const struct func *bb = b;

    if (aa->secs < bb->secs)
        return -1;
    else if (aa->secs > bb->secs)
        return 1;
    else
        return 0;
}


int main(void)
{

    bool ok = true;
    for (int i = 0; i < num_funcs; ++i) {
        if (!works(funcs[i].name, funcs[i].func)) {
            fprintf(stderr, "%s does not work\n", funcs[i].name);            
            ok = false;
        }
    }
    if (!ok)
        return EXIT_FAILURE;

    for (int i = 0; i < num_funcs; ++i)
        funcs[i].secs = timeit(funcs[i].func);
    qsort(funcs, num_funcs, sizeof(funcs[0]), funcmp);
    for (int i = 0; i < num_funcs; ++i)
        printf("#%d %.3f us %s\n", i+1, funcs[i].secs * 1e6, funcs[i].name);

    return 0;
}
Run Code Online (Sandbox Code Playgroud)


Dig*_*oss 5

我不知道一个.关于C的一个好处是,如果你编写自己的函数,它就像库函数一样快速有效.(在许多其他语言中情况并非如此.)

您可以反转字符串和子字符串,然后搜索.

最后,当字符串库不够好时,人们经常做的另一件事就是转移到正则表达式.

好吧,我写了两reverse()rstrstr(),如果我们幸运的话可能工作.摆脱__restrictC++.您也可能想要创建参数const,但是您需要转换返回值.要回答您的评论问题,您可以通过从中减去原始字符串指针来从子字符串的地址获取索引.好:

#include <stdlib.h>
#include <string.h>

char *reverse(const char * __restrict const s)
{
  if (s == NULL)
    return NULL;
  size_t i, len = strlen(s);
  char *r = malloc(len + 1);

  for(i = 0; i < len; ++i)
    r[i] = s[len - i - 1];
  r[len] = 0;
  return r;
}

char *rstrstr(char *__restrict s1, char *__restrict s2)
{
  size_t  s1len = strlen(s1);
  size_t  s2len = strlen(s2);
  char *s;

  if (s2len > s1len)
    return NULL;
  for (s = s1 + s1len - s2len; s >= s1; --s)
    if (strncmp(s, s2, s2len) == 0)
      return s;
  return NULL;
}
Run Code Online (Sandbox Code Playgroud)

  • 显然,我的陈述暗示"除非你破坏实现",但我们不能在不包括该假设的情况下对任何事情作出任何陈述.我的意思是,*认真*现在. (6认同)
  • 你们总是缺少整体观点,即你的例程是用C语言编写的,库例程是用C语言编写的,每个人都在一个公平的竞争环境中.让我们看看有人在Perl,Python,Ruby甚至Java中实现strcmp.这是,咳咳,*明显*,点. (6认同)
  • 库例程不一定用C语言编写.当我在标准库上工作时,我通常都在编写程序集.您可以在几乎任何编译语言中编写C库函数,该语言了解目标平台上的C调用约定.(那就是说,我同意所有这一切都不重要). (2认同)
  • 标准库几乎肯定不使用 C;它使用汇编或具有编译器特定内在函数的 C。基本上不可能使 strrstr 与标准库 strstr 一样快。 (2认同)

hha*_*fez 0

我不相信 C 字符串库中有这个库,但是编写自己的字符串库是很简单的,在一个条件下,您知道字符串的长度或者它已正确终止。