使用python和pydev解析twitter feed

-1 python

使用这里的代码我得到一个错误 - http://sentdex.com/sentiment-analysisbig-data-and-python-tutorials-algorithmic-trading/how-to-parse-twitter-code-and-tutorial/

代码是

 import re
 from re import sub
 import time
 import cookielib
  from cookielib import CookieJar
 import urllib2
 from urllib2 import urlopen
 import difflib
 cj = CookieJar()
 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
 opener.addheaders = [('User-agent', 'Mozilla/5.0')]


 startingLink = ‘https://twitter.com/search/realtime?q=’

 def twitParser():

 oldTwit = [] newTwit = []

 howSimAr = [.5,.5,.5,.5,.5]

 while 1 < 2:

 try:

 sourceCode = opener.open(‘https://twitter.com/search/realtime?q=’+keyWord+‘&      src=hash’).read()

splitSource = re.findall(r’<p class=”js-tweet-text tweet-text”>(.*?)</p>’,sourceCode)

 for item in splitSource:

 print ”
 print ”
 print ”
 print ‘ __________________________’
 aTweet = re.sub(r’<.*?>’,”,item)
 print aTweet
 newTwit.append(aTweet)

comparison = difflib.SequenceMatcher(None, newTwit, oldTwit)
howSim = comparison.ratio()
 print ‘#############’
 print ‘This selection is’,howSim,‘similar to the past’
howSimAr.append(howSim)
howSimAr.remove(howSimAr[0])

 waitMultiplier = reduce(lambda x, y: x+y, howSimAr)/len(howSimAr)

 print ”
 print ‘The current similarity array:’,howSimAr
 print ‘Our current Multiplier:’, waitMultiplier
 print ‘###############’

 oldTwit = [None]

for eachItem in newTwit:

oldTwit.append(eachItem)

newTwit = [None]

time.sleep(waitMultiplier*45)

except Exception, e:
print str(e)
print ‘error in the main try’
time.sleep(555)



 twitParser()
Run Code Online (Sandbox Code Playgroud)

我得到错误 -

File "C:\Users\thisismypc\workspace\hithere\hithere", line 16
SyntaxError: Non-ASCII character '\xe2' in file C:\Users\thisismypc\workspace\hithere\hithere on line 16, but no encoding declared; 
see http://python.org/dev/peps/pep-0263/ for details
Run Code Online (Sandbox Code Playgroud)

小智 5

看起来您使用的是'和'字符,它们是真正的非ASCII字符.尝试分别将它们替换为'和'.