BSA*_*BSA 5 javascript regex string-comparison
我有一个大字符串(1000 个单词),我想将其与数组的所有元素进行比较,该数组也包含大字符串,以获取所有 3 个或更多连续单词匹配。我已经用正则表达式实现了它,但得到了空白匹配数组。
较小文本的示例:
let textToCompare = "Hello there how are you doing with your life";
let textsToCompareWith= [
{ id:1, text:"Hope you are doing good with your life" },
{ id:2, text:"what are you doing with your life. hello there how are you" },
{ id:3, text:"hello there mate" }
];
Run Code Online (Sandbox Code Playgroud)
预期输出:
[
{id:1, matchedText:["with your life"]},
{id:2, matchedText:["are you doing with your life","hello there how are you"]},
{id:3, matchedText:[]}
];
Run Code Online (Sandbox Code Playgroud)
电流输出:
[
{id:1, matchedText:[]},
{id:2, matchedText:[]},
{id:3, matchedText:[]}
];
Run Code Online (Sandbox Code Playgroud)
我的代码:
let regex = new RegExp("\\b" + textToCompare.split(" ").join("\\b.*\\b") + "\\b", "gi");
let output = textsToCompareWith.map(textObj => {
// Match against each element in the array
let matchedText = textObj?.text.match(regex);
console.log(matchedText);
return {
id: textObj.id,
matchedText: matchedText ? matchedText : [] // Return an empty array if no match is found
};
});
console.log(output);
Run Code Online (Sandbox Code Playgroud)
你们可以互相检查每个单词并留意最后一个单词。
const
compare = (w1, w2) => {
const
result = [],
ends = {};
for (let i = 0; i < w1.length; i++) {
for (let j = 0; j < w2.length; j++) {
if (w1[i] !== w2[j]) continue;
let k = 0;
while (i + k < w1.length && j + k < w2.length) {
if (w1[i + k] !== w2[j + k]) break;
k++;
}
if (k > 2 && !ends[j + k]) {
result.push(w2.slice(j, j + k).join(' '));
ends[j + k] = true;
}
}
}
return result;
},
lower = s => s.toLowerCase(),
textToCompare = "Hello there how are you doing with your life",
textsToCompareWith = [{ id: 1, text: "Hope you are doing good with your life" }, { id: 2, text: "what are you doing with your life. hello there how are you" }, { id: 3, text: "hello there mate" }],
words = textToCompare.match(/\w+/g).map(lower),
result = textsToCompareWith.map(({ id, text }) => ({
id,
matchedText: compare(words, text.match(/\w+/g).map(lower))
}));
console.log(result);Run Code Online (Sandbox Code Playgroud)
.as-console-wrapper { max-height: 100% !important; top: 0; }Run Code Online (Sandbox Code Playgroud)
一种稍微不同的方法,避免使用单词。
const
compare = (w1, w2) => {
const
result = [],
skip = {};
for (let i = 0; i < w1.length; i++) {
for (let j = 0; j < w2.length; j++) {
if (skip[j] || w1[i] !== w2[j]) continue;
let k = 0;
while (i + k < w1.length && j + k < w2.length) {
if (w1[i + k] !== w2[j + k]) break;
k++;
}
if (k > 2) {
result.push(w2.slice(j, j + k).join(' '));
while (k--) skip[j + k] = true;
}
}
}
return result;
},
lower = s => s.toLowerCase(),
textToCompare = "Hello there how are you doing with your life",
textsToCompareWith = [{ id: 1, text: "Hope you are doing good with your life" }, { id: 2, text: "what are you doing with your life. hello there how are you" }, { id: 3, text: "hello there mate" }],
words = textToCompare.match(/\w+/g).map(lower),
result = textsToCompareWith.map(({ id, text }) => ({
id,
matchedText: compare(words, text.match(/\w+/g).map(lower))
}));
console.log(result);Run Code Online (Sandbox Code Playgroud)
.as-console-wrapper { max-height: 100% !important; top: 0; }Run Code Online (Sandbox Code Playgroud)