查找字符串中搜索词的所有索引

Bad*_*Cat 4 string swift

我需要一种快速方法来查找字符串中可能出现的搜索词的所有索引。我尝试了这种“暴力”String扩展方法:

// Note: makes use of ExSwift
extension String
{
    var length: Int { return count(self) }

    func indicesOf(searchTerm:String) -> [Int] {
        var indices = [Int]()
        for i in 0 ..< self.length {
            let segment = self[i ... (i + searchTerm.length - 1)]
            if (segment == searchTerm) {
                indices.append(i)
            }
        }
        return indices;
    }
}
Run Code Online (Sandbox Code Playgroud)

...但是速度慢得离谱,尤其是搜索词越短。快速找到所有索引的更好方法是什么?

Vic*_*ler 5

正如 Martin 所说,您可以在字符串匹配中实现一些众所周知的最快算法,Knuth\xe2\x80\x93Morris\xe2\x80\x93Pratt字符串搜索算法(或KMP算法)搜索“单词”的出现W算法)在主字符串中 “文本字符串” S

\n\n

该算法的复杂度为O(n),其中n是 的长度SO大 O 表示法

\n\n
extension String {\n\n    // Build pi function of prefixes\n    private func build_pi(str: String) -> [Int] {\n\n       var n = count(str)\n       var pi = Array(count: n + 1, repeatedValue: 0)\n       var k = -1\n       pi[0] = -1\n\n       for (var i = 0; i < n; ++i) {\n           while (k >= 0 && str[k] != str[i]) {\n              k = pi[k]\n           }\n           pi[i + 1] = ++k\n       }\n\n       return pi\n    }\n\n    // Knuth-Morris Pratt algorithm\n    func searchPattern(pattern: String) -> [Int] {\n\n       var matches = [Int]()\n       var n = count(self)\n\n       var m = count(pattern)\n       var k = 0\n       var pi = build_pi(pattern)\n\n       for var i = 0; i < n; ++i {\n           while (k >= 0 && (k == m || pattern[k] != self[i])) {\n              k = pi[k]\n           }\n           if ++k == m {\n              matches.append(i - m + 1)\n           }\n       }\n\n       return matches\n    }\n\n    subscript (i: Int) -> Character {\n        return self[advance(self.startIndex, i)]\n    }\n}\n
Run Code Online (Sandbox Code Playgroud)\n\n

然后您可以通过以下方式使用它:

\n\n
var string = "apurba mandal loves ayoshi loves"\nvar pattern = "loves"\n\nprintln(string.searchPattern(pattern))\n
Run Code Online (Sandbox Code Playgroud)\n\n

输出应该是:

\n\n
[14, 27]\n
Run Code Online (Sandbox Code Playgroud)\n\n

它属于字符串内模式出现的起始索引。我希望这对你有帮助。

\n\n
\n

编辑:

\n
\n\n

正如 Martin 在他的评论中所说,您需要避免使用通过 anadvance索引的函数,因为它的O(索引位置)StringInt

\n\n

一种可能的解决方案是将 转换String为数组Character,然后访问索引的时间复杂度为O(1)

\n\n

然后extension可以改成这样:

\n\n
extension String {\n\n   // Build pi function of prefixes\n   private func build_pi(str: [Character]) -> [Int] {\n\n      var n = count(str)\n      var pi = Array(count: n + 1, repeatedValue: 0)\n      var k = -1\n      pi[0] = -1\n\n      for (var i = 0; i < n; ++i) {\n          while (k >= 0 && str[k] != str[i]) {\n              k = pi[k]\n          }\n          pi[i + 1] = ++k\n      }\n\n      return pi\n   }\n\n   // Knuth-Morris Pratt algorithm\n   func searchPattern(pattern: String) -> [Int] {\n\n      // Convert to Character array to index in O(1)\n      var patt = Array(pattern)\n      var S = Array(self)\n\n      var matches = [Int]()\n      var n = count(self)\n\n      var m = count(pattern)\n      var k = 0\n      var pi = build_pi(patt)\n\n      for var i = 0; i < n; ++i {\n         while (k >= 0 && (k == m || patt[k] != S[i])) {\n             k = pi[k]\n         }\n         if ++k == m {\n             matches.append(i - m + 1)\n         }\n      }\n\n      return matches\n   }\n}\n
Run Code Online (Sandbox Code Playgroud)\n