一种可能的算法,用于确定两个字符串是否是彼此的字谜？

Question

一种可能的算法,用于确定两个字符串是否是彼此的字谜？

Ale*_*ser 8 c string algorithm math anagram

我有这个想法(使用C语言)来检查由ASCII字母组成的两个字符串是否是彼此的字谜:

检查字符串是否长度相同.
检查两个字符串的所有字符的ASCII值之和是否相同.
检查两个字符串的所有字符的ASCII值的乘积是否相同.

我相信如果所有三个都是正确的,那么字符串必须是彼此的字谜.但是,我无法证明这一点.有人可以帮助我证明或反驳这会起作用吗？

谢谢!

Answer 1

tem*_*def 11

我写了一个快速程序,强力搜索的冲突,并发现,这种方法并不会总是奏效.字符串ABFN和AAHM具有相同的ASCII和和产品,但不是彼此的字母.它们的ASCII总和为279,ASCII产品为23,423,400.

有比这更多的冲突.我的程序搜索了所有长度为四个字符串,发现了11,737个冲突.

供参考,这是C++源代码:

#include <iostream>
#include <map>
#include <string>
#include <vector>
using namespace std;

int main() {
  /* Sparse 2D table where used[sum][prod] is either nothing or is a string
   * whose characters sum to "sum" and whose product is "prod".
   */
  map<int, map<int, string> > used;

  /* List of all usable characters in the string. */
  vector<char> usable;
  for (char ch = 'A'; ch <= 'Z'; ch++) {
    usable.push_back(ch);
  }
  for (char ch = 'a'; ch <= 'z'; ch++) {
    usable.push_back(ch);
  }

  /* Brute-force search over all possible length-four strings.  To avoid
   * iterating over anagrams, the search only explores strings whose letters
   * are in increasing ASCII order.
   */
  for (int a = 0; a < usable.size(); a++) {
    for (int b = a; b < usable.size(); b++) {
      for (int c = b; c < usable.size(); c++) {
        for (int d = c; d < usable.size(); d++) {
          /* Compute the sum and product. */
          int sum  = usable[a] + usable[b] + usable[c] + usable[d];
          int prod = usable[a] * usable[b] * usable[c] * usable[d];

          /* See if we have already seen this. */
          if (used.count(sum) &&
              used[sum].count(prod)) {
            cout << "Conflict found: " << usable[a] << usable[b] << usable[c] << usable[d] << " conflicts with " << used[sum][prod] << endl;
          }

          /* Update the table. */
          used[sum][prod] = string() + usable[a] + usable[b] + usable[c] + usable[d];
        }
      }
    }
  }
}

Run Code Online (Sandbox Code Playgroud)

希望这可以帮助!

Answer 2

G. *_*ach 5

你的做法是错误的; 我无法解释为什么因为我不理解它,但至少对于基数3有不同的集合,它们具有相同的总和和产品:https://math.stackexchange.com/questions/38671/two-sets-的-3-正整数与-相等-相加和产品

Answer 3

wil*_*ser 5

字母az和AZ用于索引26个素数的数组,这些素数的乘积用作单词的哈希值.同等产品< - >相同的字母.

(下面片段中primes26 []数组中散列值的顺序是基于荷兰语中的字母频率,因为尝试最大限度地模拟了预期的产品)

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define COUNTOF(a) (sizeof (a)/ sizeof (a)[0])

typedef unsigned long long HashVal;
HashVal hashmem (char *str, size_t len);

unsigned char primes26[] =
{
5,71,79,19,2,83,31,43,11,53,37,23,41,3,13,73,101,17,29,7,59,47,61,97,89,67,
};

struct anahash {
        struct anahash *next;
        unsigned freq;
        HashVal hash;
        char word[1];
        };

struct anahash *hashtab[1024*1024] = {NULL,};
struct anahash *new_word(char *str, size_t len);
struct anahash **hash_find(struct anahash *wp);

/*********************************************/

HashVal hashmem (char *str, size_t len)
{
size_t idx;
HashVal val=1;

if (!len) return 0;
for (idx = 0; idx < len; idx++) {
        char ch = str[idx];
        if (ch >= 'A' && ch <= 'Z' ) val *= primes26[ ch - 'A'];
        else if (ch >= 'a' && ch <= 'z' ) val *= primes26[ ch - 'a'];
        else continue;
        }
return val;
}

struct anahash *new_word(char *str, size_t len)
{
struct anahash *wp;
if (!len) len = strlen(str);

wp = malloc(len + sizeof *wp );
wp->hash = hashmem(str, len);
wp->next = NULL;
wp->freq = 0;
memcpy (wp->word, str, len);
wp->word[len] = 0;
return wp;
}

struct anahash **hash_find(struct anahash *wp)
{
unsigned slot;
struct anahash **pp;

slot = wp->hash % COUNTOF(hashtab);

for (pp = &hashtab[slot]; *pp; pp= &(*pp)->next) {
        if ((*pp)->hash < wp->hash) continue;
        if (strcmp( wp->word, (*pp)->word ) > 0) continue;
        break;
        }
return pp;
}

char buff [16*4096];
int main (void)
{
size_t pos,end;
struct anahash *wp, **pp;
HashVal val;

memset(hashtab, 0, sizeof hashtab);

while (fgets(buff, sizeof buff, stdin)) {
        for (pos=0; pos < sizeof buff && buff[pos]; ) {
                for(end = pos; end < sizeof buff && buff[end]; end++ ) {
                        if (buff[end] < 'A' || buff[end] > 'z') break;
                        if (buff[end] > 'Z' && buff[end] < 'a') break;
                        }
                if (end > pos) {
                        wp = new_word(buff+pos, end-pos);
                        if (!wp) {pos=end; continue; }
                        pp = hash_find(wp);
                        if (!*pp) *pp = wp;
                        else if ((*pp)->hash == wp->hash
                         && !strcmp((*pp)->word , wp->word)) free(wp);
                        else { wp->next = *pp; *pp = wp; }
                        (*pp)->freq +=1;
                        }
                pos = end;
                for(end = pos; end < sizeof buff && buff[end]; end++ ) {
                        if (buff[end] >= 'A' && buff[end] <= 'Z') break;
                        if (buff[end] >= 'z' && buff[end] <= 'a') break;
                        }
                pos = end;
                }
        }
for (pos = 0;  pos < COUNTOF(hashtab); pos++) {
        if (! &hashtab[pos] ) continue;

        for (pp = &hashtab[pos]; wp = *pp; pp = &wp->next) {
                if (val != wp->hash) {
                        fprintf (stdout, "\nSlot:%u:\n", pos );
                        val = wp->hash;
                        }
                fprintf (stdout, "\t%llx:%u:%s\n", wp->hash, wp->freq, wp->word);
                }
        }

return 0;
}

Run Code Online (Sandbox Code Playgroud)

归档时间：	12 年，10 月前
查看次数：	5091 次
最近记录：	7 年，4 月前