阅读文档并完成教程后,我想我会做一个小演示。原来我的模型不想训练。这是代码
import spacy
import random
import json
TRAINING_DATA = [
["My little kitty is so special", {"KAT": True}],
["Dude, Totally, Yeah, Video Games", {"KAT": False}],
["Should I pay $1,000 for the iPhone X?", {"KAT": False}],
["The iPhone 8 reviews are here", {"KAT": False}],
["Noa is a great cat name.", {"KAT": True}],
["We got a new kitten!", {"KAT": True}]
]
nlp = spacy.blank("en")
category = nlp.create_pipe("textcat")
nlp.add_pipe(category)
category.add_label("KAT")
# Start the training
nlp.begin_training()
# Loop for 10 iterations
for itn in range(100):
# Shuffle the training data
random.shuffle(TRAINING_DATA)
losses = {}
# Batch the examples and iterate over them
for batch in spacy.util.minibatch(TRAINING_DATA, size=2):
texts = [text for text, entities in batch]
annotations = [{"textcat": [entities]} for text, entities in batch]
nlp.update(texts, annotations, losses=losses)
if itn % 20 == 0:
print(losses)
Run Code Online (Sandbox Code Playgroud)
当我运行它时,输出表明学到的很少。
{'textcat': 0.0}
{'textcat': 0.0}
{'textcat': 0.0}
{'textcat': 0.0}
{'textcat': 0.0}
Run Code Online (Sandbox Code Playgroud)
这感觉不对。应该有错误或有意义的标签。预测证实了这一点。
import spacy
import random
import json
TRAINING_DATA = [
["My little kitty is so special", {"KAT": True}],
["Dude, Totally, Yeah, Video Games", {"KAT": False}],
["Should I pay $1,000 for the iPhone X?", {"KAT": False}],
["The iPhone 8 reviews are here", {"KAT": False}],
["Noa is a great cat name.", {"KAT": True}],
["We got a new kitten!", {"KAT": True}]
]
nlp = spacy.blank("en")
category = nlp.create_pipe("textcat")
nlp.add_pipe(category)
category.add_label("KAT")
# Start the training
nlp.begin_training()
# Loop for 10 iterations
for itn in range(100):
# Shuffle the training data
random.shuffle(TRAINING_DATA)
losses = {}
# Batch the examples and iterate over them
for batch in spacy.util.minibatch(TRAINING_DATA, size=2):
texts = [text for text, entities in batch]
annotations = [{"textcat": [entities]} for text, entities in batch]
nlp.update(texts, annotations, losses=losses)
if itn % 20 == 0:
print(losses)
Run Code Online (Sandbox Code Playgroud)
感觉我的代码丢失了一些东西,但我不知道是什么。
如果您更新并使用 spaCy 3 - 上面的代码将不再有效。解决方案是进行一些更改进行迁移。我相应地修改了cantdutchthis中的示例。
变更摘要:
add_pipe略有变化nlp.update现在需要一个Example对象而不是元组text,annotationimport spacy
# Add imports for example, as well as textcat config...
from spacy.training import Example
from spacy.pipeline.textcat import single_label_bow_config, single_label_default_config
from thinc.api import Config
import random
# labels should be one-hot encoded
TRAINING_DATA = [
["My little kitty is so special", {"KAT0": True}],
["Dude, Totally, Yeah, Video Games", {"KAT1": True}],
["Should I pay $1,000 for the iPhone X?", {"KAT1": True}],
["The iPhone 8 reviews are here", {"KAT1": True}],
["Noa is a great cat name.", {"KAT0": True}],
["We got a new kitten!", {"KAT0": True}]
]
# bow
# config = Config().from_str(single_label_bow_config)
# textensemble with attention
config = Config().from_str(single_label_default_config)
nlp = spacy.blank("en")
# now uses `add_pipe` instead
category = nlp.add_pipe("textcat", last=True, config=config)
category.add_label("KAT0")
category.add_label("KAT1")
# Start the training
nlp.begin_training()
# Loop for 10 iterations
for itn in range(100):
# Shuffle the training data
random.shuffle(TRAINING_DATA)
losses = {}
# Batch the examples and iterate over them
for batch in spacy.util.minibatch(TRAINING_DATA, size=4):
texts = [nlp.make_doc(text) for text, entities in batch]
annotations = [{"cats": entities} for text, entities in batch]
# uses an example object rather than text/annotation tuple
examples = [Example.from_dict(doc, annotation) for doc, annotation in zip(
texts, annotations
)]
nlp.update(examples, losses=losses)
if itn % 20 == 0:
print(losses)
Run Code Online (Sandbox Code Playgroud)
根据 Ines 的评论,这就是答案。
import spacy
import random
import json
TRAINING_DATA = [
["My little kitty is so special", {"KAT": True}],
["Dude, Totally, Yeah, Video Games", {"KAT": False}],
["Should I pay $1,000 for the iPhone X?", {"KAT": False}],
["The iPhone 8 reviews are here", {"KAT": False}],
["Noa is a great cat name.", {"KAT": True}],
["We got a new kitten!", {"KAT": True}]
]
nlp = spacy.blank("en")
category = nlp.create_pipe("textcat")
category.add_label("KAT")
nlp.add_pipe(category)
# Start the training
nlp.begin_training()
# Loop for 10 iterations
for itn in range(100):
# Shuffle the training data
random.shuffle(TRAINING_DATA)
losses = {}
# Batch the examples and iterate over them
for batch in spacy.util.minibatch(TRAINING_DATA, size=1):
texts = [nlp(text) for text, entities in batch]
annotations = [{"cats": entities} for text, entities in batch]
nlp.update(texts, annotations, losses=losses)
if itn % 20 == 0:
print(losses)
Run Code Online (Sandbox Code Playgroud)