Vik*_*arg 1 python ubuntu ipython python-2.7
我正在研究以下类型的数据.
itemid category subcategory title
1 10000010 ????????? ?????????? ? ???????? Toyota Sera, 1991
2 10000025 ?????? ??????????? ????? ?????? ??????
3 10000094 ?????? ???? ??????, ?????, ?????????? ?????? Steilmann
4 10000101 ????????? ?????????? ? ???????? Ford Focus, 2011
5 10000132 ????????? ???????? ? ?????????? ??????? 3.0 Bar
6 10000152 ????????? ?????????? ? ???????? ??? 2115 Samara, 2005
现在我运行以下命令
import pandas as pd
trainingData = pd.read_table("train.tsv",nrows=10, header=0,encoding='utf-8')
trainingData['itemid'].head()
0 10000010
1 10000025
2 10000094
3 10000101
4 10000132
Name: itemid
这一切都很好但是当我做类似的事情时
trainingData['itemid','category'].head()
Error:
---------------------------------------------------------------------------
UnicodeDecodeError Traceback (most recent call last)
/home/vikram/Documents/Avito/ in ()
----> 1 trainingData[['itemid','category']].head()
/usr/lib/python2.7/dist-packages/IPython/core/displayhook.pyc in __call__(self, result)
236 self.start_displayhook()
237 self.write_output_prompt()
--> 238 format_dict = self.compute_format_data(result)
239 self.write_format_data(format_dict)
240 self.update_user_ns(result)
/usr/lib/python2.7/dist-packages/IPython/core/displayhook.pyc in compute_format_data(self, result)
148 MIME type representation of the object.
149 """
--> 150 return self.shell.display_formatter.format(result)
151
152 def write_format_data(self, format_dict):
/usr/lib/python2.7/dist-packages/IPython/core/formatters.pyc in format(self, obj, include, exclude)
124 continue
125 try:
--> 126 data = formatter(obj)
127 except:
128 # FIXME: log the exception
/usr/lib/python2.7/dist-packages/IPython/core/formatters.pyc in __call__(self, obj)
445 type_pprinters=self.type_printers,
446 deferred_pprinters=self.deferred_printers)
--> 447 printer.pretty(obj)
448 printer.flush()
449 return stream.getvalue()
/usr/lib/python2.7/dist-packages/IPython/lib/pretty.pyc in pretty(self, obj)
352 if callable(obj_class._repr_pretty_):
353 return obj_class._repr_pretty_(obj, self, cycle)
--> 354 return _default_pprint(obj, self, cycle)
355 finally:
356 self.end_group()
/usr/lib/python2.7/dist-packages/IPython/lib/pretty.pyc in _default_pprint(obj, p, cycle)
472 if getattr(klass, '__repr__', None) not in _baseclass_reprs:
473 # A user-provided repr.
--> 474 p.text(repr(obj))
475 return
476 p.begin_group(1, ' 456 self.to_string(buf=buf)
457 value = buf.getvalue()
458 if max([len(l) for l in value.split('\n')]) > terminal_width:
/usr/lib/pymodules/python2.7/pandas/core/frame.pyc in to_string(self, buf, columns, col_space, colSpace, header, index, na_rep, formatters, float_format, sparsify, nanRep, index_names, justify, force_unicode)
1024 index_names=index_names,
1025 header=header, index=index)
-> 1026 formatter.to_string(force_unicode=force_unicode)
1027
1028 if buf is None:
/usr/lib/pymodules/python2.7/pandas/core/format.pyc in to_string(self, force_unicode)
176 for i, c in enumerate(self.columns):
177 if self.header:
--> 178 fmt_values = self._format_col(c)
179 cheader = str_columns[i]
180 max_len = max(max(len(x) for x in fmt_values),
/usr/lib/pymodules/python2.7/pandas/core/format.pyc in _format_col(self, col)
217 float_format=self.float_format,
218 na_rep=self.na_rep,
--> 219 space=self.col_space)
220
221 def to_html(self):
/usr/lib/pymodules/python2.7/pandas/core/format.pyc in format_array(values, formatter, float_format, na_rep, digits, space, justify)
424 justify=justify)
425
--> 426 return fmt_obj.get_result()
427
428
/usr/lib/pymodules/python2.7/pandas/core/format.pyc in get_result(self)
471 fmt_values.append(float_format(v))
472 else:
--> 473 fmt_values.append(' %s' % _format(v))
474
475 return _make_fixed_width(fmt_values, self.justify)
/usr/lib/pymodules/python2.7/pandas/core/format.pyc in _format(x)
457 else:
458 # object dtype
--> 459 return '%s' % formatter(x)
460
461 vals = self.values
/usr/lib/pymodules/python2.7/pandas/core/common.pyc in _stringify(col)
503 def _stringify(col):
504 # unicode workaround
--> 505 return unicode(col)
506
507 def _maybe_make_list(obj):
UnicodeDecodeError: 'ascii' codec can't decode byte 0xd0 in position 0: ordinal not in range(128)
请帮我正确"显示"数据.
我遇到了由IPython引起的同样问题,它无法显示Pandas head()函数返回的非ASCII文本.事实证明,Python的默认编码设置为'ascii'在我的机器上.你可以用它来检查
import sys
sys.getdefaultencoding()
Run Code Online (Sandbox Code Playgroud)
解决方案是将默认编码重新设置为UTF-8:
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
Run Code Online (Sandbox Code Playgroud)
在此之后,IPython正确显示了带有非ASCII字符的Pandas数据帧.
请注意,reload调用是使setdefaultencoding函数可用所必需的.没有它你会得到错误:
AttributeError: 'module' object has no attribute 'setdefaultencoding'
Run Code Online (Sandbox Code Playgroud)
| 归档时间: |
|
| 查看次数: |
1221 次 |
| 最近记录: |