我参考了阿穆勒的词云和其他一些词云。事情是这样的:
\n\n#!c:/Python27/python.exe\n# coding: UTF-8\nfrom os import path\nfrom wordcloud import WordCloud\nimport MeCab as mc\n\nd = path.dirname("C:\\Users\\BobLeponge\\Desktop\\jpn\\JPNTEXT.txt")\ntext = open(path.join(d, \'JPNTEXT.txt\')).read()\ntext = text.decode("utf-8")\n\ndef mecab_analysis(text):\n t = mc.Tagger(\'-Ochasen -d/usr/local/Cellar/mecab/0.996/lib/mecab/dic/mecab-ipadic-neologd/\')\n enc_text = text.encode(\'utf-8\')\n node = t.parseToNode(enc_text)\n output = []\n while(node):\n if node.surface != "":\n word_type = node.feature.split(",")[0]\n if word_type in ["\xe5\xbd\xa2\xe5\xae\xb9\xe8\xa9\x9e", "\xe5\x8b\x95\xe8\xa9\x9e","\xe5\x90\x8d\xe8\xa9\x9e", "\xe5\x89\xaf\xe8\xa9\x9e"]:\n output.append(node.surface)\n node = node.next\n if node is None:\n break\n return output\n\ndef create_wordcloud(text):\n fpath = "C:\\WINDOWS\\Fonts\\NotoSansMonoCJKjp-Regular.otf"\n stop_words = [ u\'\xe3\x81\xa6\xe3\x82\x8b\', u\'\xe3\x81\x84\xe3\x82\x8b\', u\'\xe3\x81\xaa\xe3\x82\x8b\', u\'\xe3\x82\x8c\xe3\x82\x8b\', u\'\xe3\x81\x99\xe3\x82\x8b\', u\'\xe3\x81\x82\xe3\x82\x8b\', u\'\xe3\x81\x93\xe3\x81\xa8\', u\'\xe3\x81\x93\xe3\x82\x8c\', u\'\xe3\x81\x95\xe3\x82\x93\', u\'\xe3\x81\x97\xe3\x81\xa6\', \\\n u\'\xe3\x81\x8f\xe3\x82\x8c\xe3\x82\x8b\', u\'\xe3\x82\x84\xe3\x82\x8b\', …Run Code Online (Sandbox Code Playgroud)