我是python新手,需要帮助!我正在练习python NLTK文本分类.以下是我在http://www.laurentluce.com/posts/twitter-sentiment-analysis-using-python-and-nltk/上练习的代码示例
我试过这个
from nltk import bigrams
from nltk.probability import ELEProbDist, FreqDist
from nltk import NaiveBayesClassifier
from collections import defaultdict
train_samples = {}
with file ('positive.txt', 'rt') as f:
for line in f.readlines():
train_samples[line]='pos'
with file ('negative.txt', 'rt') as d:
for line in d.readlines():
train_samples[line]='neg'
f=open("test.txt", "r")
test_samples=f.readlines()
def bigramReturner(text):
tweetString = text.lower()
bigramFeatureVector = {}
for item in bigrams(tweetString.split()):
bigramFeatureVector.append(' '.join(item))
return bigramFeatureVector
def get_labeled_features(samples):
word_freqs = {}
for text, label in train_samples.items():
tokens = text.split()
for …Run Code Online (Sandbox Code Playgroud)