sso*_*nez 1 php utf-8 preg-replace
我当前正在从数据库中检索数据。我收到一个数组,其中包含带有 UTF8 编码值的“标题”索引。我想要的是使用这个值作为保存某些内容的文件的名称,所以我这样做:
\n\nfile_put_contents($filename, $content);\nRun Code Online (Sandbox Code Playgroud)\n\n$文件名在哪里
\n\n\'-\' . $category[\'root\'] . \'-articles-\' . $category[\'id\'] . \'-\' . $this->Urlize($category[\'category\'])\nRun Code Online (Sandbox Code Playgroud)\n\n这是“Urlize”的代码:
\n\nprivate function Urlize($value)\n{\n if ($value != null && trim($value) != \'\')\n {\n $value = preg_replace(\'/([\\[\\(].*[\\]\\)])/i\', \'\', $value);\n $value = preg_replace(\'/[\\s]/i\', \'-\', $value);\n $value = preg_replace(\'/[,!?.;:\\"\\\'&+\\/]/i\', \'-\', $value);\n $value = preg_replace(\'/[-]+/i\', \'-\', $value);\n $value = preg_replace(\'/(^-)/i\', \'\', $value);\n $value = preg_replace(\'/-$/i\', \'\', $value);\n $value = preg_replace(\'/[\xc3\xa9\xc3\xa8\xc3\xaa]/i\', \'e\', $value);\n $value = preg_replace(\'/[\xc3\xa2\xc3\xa0]/i\', \'a\', $value);\n $value = preg_replace(\'/[\xc3\xb6\xc3\xb4]/i\', \'o\', $value);\n $value = preg_replace(\'/[\xc3\xbb\xc3\xb9\xc3\xbc]/i\', \'u\', $value);\n $value = preg_replace(\'/[\xc3\xae\xc3\xaf\xc3\xad\xc3\xac]/i\', \'i\', $value);\n $value = preg_replace(\'/[#]/i\', \'sharp\', $value);\n $value = preg_replace(\'/[<>]/i\', \'-\', $value);\n\n if ($value[strlen($value) - 1] == \'-\')\n {\n $value = substr($value, 0, strlen($value) - 1);\n }\n }\n\n return strtolower($value);\n}\nRun Code Online (Sandbox Code Playgroud)\n\n我的问题是,对于标题“Th\xc3\xa9orie g\xc3\xa9n\xc3\xa9rale”,我得到“theeorie-geeneerale”,因此“e”加倍。我想这与字符集有关,因为我找不到避免它的好方法。当然,我想要“一般理论”。
\n\n谢谢
\nu在正则表达式中使用 unicode 时,需要使用 unicode 修饰符。
所以尝试:
\n\nfunction Urlize($value) {\n if ($value != null && trim($value) != \'\')\n {\n $value = preg_replace(\'/([\\[\\(].*[\\]\\)])/i\', \'\', $value);\n $value = preg_replace(\'/[\\s]/i\', \'-\', $value);\n $value = preg_replace(\'/[,!?.;:\\"\\\'&+\\/]/i\', \'-\', $value);\n $value = preg_replace(\'/[-]+/i\', \'-\', $value);\n $value = preg_replace(\'/(^-)/i\', \'\', $value);\n $value = preg_replace(\'/-$/i\', \'\', $value);\n $value = preg_replace(\'/[\xc3\xa9\xc3\xa8\xc3\xaa]/iu\', \'e\', $value);\n $value = preg_replace(\'/[\xc3\xa2\xc3\xa0]/iu\', \'a\', $value);\n $value = preg_replace(\'/[\xc3\xb6\xc3\xb4]/iu\', \'o\', $value);\n $value = preg_replace(\'/[\xc3\xbb\xc3\xb9\xc3\xbc]/ui\', \'u\', $value);\n $value = preg_replace(\'/[\xc3\xae\xc3\xaf\xc3\xad\xc3\xac]/ui\', \'i\', $value);\n $value = preg_replace(\'/[#]/i\', \'sharp\', $value);\n $value = preg_replace(\'/[<>]/i\', \'-\', $value);\n\n if ($value[strlen($value) - 1] == \'-\')\n {\n $value = substr($value, 0, strlen($value) - 1);\n }\n }\n return strtolower($value);\n}\necho Urlize(\'Th\xc3\xa9orie g\xc3\xa9n\xc3\xa9rale\');\nRun Code Online (Sandbox Code Playgroud)\n\n演示:http://sandbox.onlinephpfunctions.com/code/3b7e5985dc23ac71a6298783d2dad646d875d3c8
\n\n输出:
\n\n\n\n\n一般理论
\n
您可以使用|(或)分组来减少 preg_replace 的数量。但这可能会让你的正则表达式更难阅读。您还可以使用数组进行查找和替换。这是第一种方法。
function Urlize($value) {\n if ($value != null && trim($value) != \'\')\n {\n $value = preg_replace(\'/(([\\[\\(].*[\\]\\)])|(^-)|-$)/i\', \'\', $value);\n $value = preg_replace(\'/([,!?.;:\\"\\\'&+\\/]|[\\s]|[-]+|[<>])/i\', \'-\', $value);\n $value = preg_replace(\'/[\xc3\xa9\xc3\xa8\xc3\xaa]/iu\', \'e\', $value);\n $value = preg_replace(\'/[\xc3\xa2\xc3\xa0]/iu\', \'a\', $value);\n $value = preg_replace(\'/[\xc3\xb6\xc3\xb4]/iu\', \'o\', $value);\n $value = preg_replace(\'/[\xc3\xbb\xc3\xb9\xc3\xbc]/ui\', \'u\', $value);\n $value = preg_replace(\'/[\xc3\xae\xc3\xaf\xc3\xad\xc3\xac]/ui\', \'i\', $value);\n $value = preg_replace(\'/[#]/i\', \'sharp\', $value);\n if ($value[strlen($value) - 1] == \'-\') {\n $value = substr($value, 0, strlen($value) - 1);\n }\n }\n return strtolower($value);\n}\necho Urlize(\'Th\xc3\xa9orie g\xc3\xa9n\xc3\xa9rale\');\nRun Code Online (Sandbox Code Playgroud)\n