我通过转换情绪分析脚本来使用它们来教我自己(可能是我的第一个错误)类和方法.
我以为我已经掌握了所有方法,但我一直在努力
global name 'get_bigram_word_feats' is not defined
我相信get_word_feats
如果它走得那么远,我也会得到一个错误.
我正在撞击这个伟大的时间.我尝试删除staticmethod
并添加自我.我究竟做错了什么?
这是我的代码:
def word_feats(words):
return dict([(word, True) for word in words])
class SentClassifier:
def __init__(self, name, location):
self.name = name
self.location = location
self.fullpath = location + "/" + name
def doesexist(self):
return os.path.isfile(self.fullpath)
def save_classifier(self):
rf = open(self.fullpath, 'wb')
pickle.dump(self.fullpath, rf)
rf.close()
def load_classifier(self):
sf = open(self.fullpath, 'rb')
sclassifier = pickle.load(sf)
sf.close()
return sclassifier
class Training:
def __init__(self, neg, pos):
self.neg = neg
self.pos = pos
self.negids = open(self.neg, 'rb').read().splitlines(True)
self.posids = open(self.pos, 'rb').read().splitlines(True)
self.exclude = set(string.punctuation)
self.exclude = self.exclude, '...'
self.swords = stopwords.words('english')
def tokens(self, words):
words = [w for w in nltk.word_tokenize(words) if w not in self.exclude and len(w) > 1
and w not in self.swords and wordnet.synsets(w)]
return words
def idlist(self, words):
thisidlist = [self.tokens(tf) for tf in words]
return thisidlist
@staticmethod
def get_word_feats(words):
return dict([(word, True) for word in words])
@staticmethod
def get_bigram_word_feats(twords, score_fn=BigramAssocMeasures.chi_sq, tn=200):
words = [w for w in twords]
bigram_finder = BigramCollocationFinder.from_words(words)
bigrams = bigram_finder.nbest(score_fn, tn)
return dict([(ngram, True) for ngram in itertools.chain(words, bigrams)])
@staticmethod
def label_feats(thelist, label):
return [(get_word_feats(lf), label) for lf in thelist]
@staticmethod
def label_grams(thelist, label):
return [(get_bigram_word_feats(gf), label) for gf in thelist()]
@staticmethod
def combinegrams(grams, feats):
for g in grams():
feats.append(g)
return feats
def negidlist(self):
return self.idlist(self.negids)
def posidlist(self):
return self.idlist(self.posids)
def posgrams(self):
return self.label_grams(self.posidlist, 'pos')
def neggrams(self):
return self.label_grams(self.negidlist, 'neg')
def negwords(self):
return self.label_feats(self.negidlist, 'neg')
def poswords(self):
return self.label_feats(self.posidlist, 'pos')
def negfeats(self):
return self.combinegrams(self.neggrams, self.negwords)
def posfeats(self):
return self.combinegrams(self.posgrams, self.poswords)
starttime = time.time()
myclassifier = SentClassifier("sentanalyzer.pickle", "classifiers")
if myclassifier.doesexist() is False:
print "training new classifier"
trainset = Training('data/neg.txt', 'data/pos.txt')
negfeats = trainset.negfeats()
posfeats = trainset.posfeats()
negcutoff = len(negfeats) * 8 / 10
poscutoff = len(posfeats) * 8 / 10
trainfeats = negfeats[:negcutoff] + posfeats[:poscutoff]
testfeats = negfeats[negcutoff:] + posfeats[poscutoff:]
print 'train on %d instances, test on %d instances' % (len(trainfeats), len(testfeats))
classifier = NaiveBayesClassifier.train(trainfeats)
print 'accuracy:', nltk.classify.util.accuracy(classifier, testfeats)
myclassifier.save_classifier()
else:
print "using existing classifier"
classifier = myclassifier.load_classifier()
classifier.show_most_informative_features(20)
mystr = "16 steps to an irresistible sales pitch, via @vladblagi: slidesha.re/1bVV7OS"
myfeat = word_feats(nltk.word_tokenize(mystr))
print classifier.classify(myfeat)
probd = classifier.prob_classify(myfeat)
print probd.prob('neg')
print probd.prob('pos')
donetime = time.time() - starttime
print donetime
Run Code Online (Sandbox Code Playgroud)
您需要的所有信息都在异常消息中:
全局名称“get_bigram_word_feats”未定义
(我的重点)
Python 不明白您想要从类访问该方法,因为您没有指定类名作为方法调用的一部分。因此,它正在全局命名空间中寻找该函数,但未能找到。
如果您还记得调用实例方法,则需要在方法前面加上前缀 ,self.
以使 Python 解释器在正确的位置查找,这也适用于静态方法,尽管您不指定self.
,而是指定类名。
因此,要解决此问题,请在方法调用前加上类名:
return [(Training.get_bigram_word_feats(gf), label) for gf in thelist()]
^---+---^
|
+-- you need this part
Run Code Online (Sandbox Code Playgroud)
归档时间: |
|
查看次数: |
5947 次 |
最近记录: |