Sal*_*ali 1 mysql encoding utf-8
我有一个数据库,其中一些元素由HTML特殊字符组成:
| Universidad Tecnológica Nacional - UTN                                                  |
| Instituto Tecnológico de Buenos Aires                                                   |
| Instituto Superior del Profesorado "Dr. Joaquín V. González" |
| Escuela Nacional de Náutica "Manuel Belgrano"                         |
| Conservatorio Nacional de Música "Carlos López Buchardo"     |
| Instituto Argentino de Computacion - IAC                                                         |
| Conservatorio de Superior de Música "Manuel de Falla"                 |
我需要将其转换为正确的UTF格式.我可以做的不仅仅是迭代数据库,并从每个代码到等效符号的映射?
á -> 'á'
" -> '"'
...
正如我在上面的评论中所提到的,在你自己的案例中你真正想要做的事情是非常不清楚的.
我可以做的不仅仅是迭代数据库,并从每个代码到等效符号的映射?
嗯,是.您可以用替换字符替换字符代码实体(例如{和ƫ),而无需在"映射"中查找字符代码.但是"总是需要查找命名实体(例如).
这是我尝试解决一般情况:
创建一个表来存储HTML中定义的命名字符实体:
CREATE TABLE ents (
  ref VARCHAR(8) NOT NULL COLLATE utf8_bin,
  rep CHAR(1)    NOT NULL,
  PRIMARY KEY (ref)
);
填充此表 - 我建议使用脚本,例如来自PHP:
$dbh = new PDO("mysql:dbname=$dbname", $username, $password);
$dbh->setAttribute(PDO::ATTR_EMULATE_PREPARES, FALSE);
$ins = $dbh->prepare('INSERT INTO ents (ref, rep) VALUES (?, ?)');
$t = get_html_translation_table(HTML_ENTITIES);
foreach ($t as $k => $v) $ins->execute([substr($v, 1, -1), $k]);
定义一个SQL函数来执行实体替换(在适用的情况下使用此表,或者使用字符代码):
DELIMITER ;;
CREATE FUNCTION dhe(s TEXT) RETURNS TEXT
BEGIN
  DECLARE n, p, i, t INT DEFAULT 0;
  DECLARE r VARCHAR(12);
  entity_search: LOOP
    SET n := LOCATE('&', s, n+1);
    IF (!n) THEN
      LEAVE entity_search;
    END IF;
    IF (SUBSTRING(s, n+1, 1) = '#') THEN
      CASE
        WHEN SUBSTRING(s, n+2, 1) RLIKE '[[:digit:]]' THEN
          SET t := 2, p := n+2, r := '[[:digit:]]';
        WHEN SUBSTRING(s, n+2, 1) = 'x' THEN
          SET t := 3, p := n+3, r := '[[:xdigit:]]';
        ELSE ITERATE entity_search;
      END CASE;
    ELSE
      SET t := 1, p := n+1, r := '[[:alnum:]_]';
    END IF;
    SET i := 0;
    reference: LOOP
      IF SUBSTRING(s, p+i, 1) NOT RLIKE r THEN
        IF SUBSTRING(s, p+i, 1) RLIKE '[[:alnum:]_]' THEN
          ITERATE entity_search;
        END IF;
        LEAVE reference;
      END IF;
      IF i = 8 THEN ITERATE entity_search; END IF;
      SET i := i + 1;
    END LOOP reference;
    SET s := CONCAT(
      LEFT(s, n-1),
      CASE t
        WHEN 1 THEN COALESCE(
          (SELECT rep FROM ents WHERE ref = SUBSTRING(s, p, i))
        , SUBSTRING(s, n, i + IF(SUBSTRING(s, p+i, 1)=';',1,0))
        )
        WHEN 2 THEN CHAR(SUBSTRING(s, p, i))
        WHEN 3 THEN CHAR(CONV(SUBSTRING(s, p, i), 16, 10))
      END,
      SUBSTRING(s, p + i + IF(SUBSTRING(s, p+i, 1)=';',1,0))
    );
  END LOOP entity_search;
  RETURN s;
END;;
DELIMITER ;
应用此函数两次以解码您的(显然)双重编码表:
UPDATE my_table SET my_column = dhe(dhe(my_column));