Aks*_*dal 28 python aggregate pandas
def stack_plot(data, xtick, col2='project_is_approved', col3='total'):
ind = np.arange(data.shape[0])
plt.figure(figsize=(20,5))
p1 = plt.bar(ind, data[col3].values)
p2 = plt.bar(ind, data[col2].values)
plt.ylabel('Projects')
plt.title('Number of projects aproved vs rejected')
plt.xticks(ind, list(data[xtick].values))
plt.legend((p1[0], p2[0]), ('total', 'accepted'))
plt.show()
def univariate_barplots(data, col1, col2='project_is_approved', top=False):
# Count number of zeros in dataframe python: /sf/answers/3607836501/
temp = pd.DataFrame(project_data.groupby(col1)[col2].agg(lambda x: x.eq(1).sum())).reset_index()
# Pandas dataframe grouby count: /sf/answers/1356991401/
temp['total'] = pd.DataFrame(project_data.groupby(col1)[col2].agg({'total':'count'})).reset_index()['total']
temp['Avg'] = pd.DataFrame(project_data.groupby(col1)[col2].agg({'Avg':'mean'})).reset_index()['Avg']
temp.sort_values(by=['total'],inplace=True, ascending=False)
if top:
temp = temp[0:top]
stack_plot(temp, xtick=col1, col2=col2, col3='total')
print(temp.head(5))
print("="*50)
print(temp.tail(5))
univariate_barplots(project_data, 'school_state', 'project_is_approved', False)
Run Code Online (Sandbox Code Playgroud)
错误:
SpecificationError Traceback (most recent call last)
<ipython-input-21-2cace8f16608> in <module>()
----> 1 univariate_barplots(project_data, 'school_state', 'project_is_approved', False)
<ipython-input-20-856fcc83737b> in univariate_barplots(data, col1, col2, top)
4
5 # Pandas dataframe grouby count: /sf/answers/1356991401/
----> 6 temp['total'] = pd.DataFrame(project_data.groupby(col1)[col2].agg({'total':'count'})).reset_index()['total']
7 print (temp['total'].head(2))
8 temp['Avg'] = pd.DataFrame(project_data.groupby(col1)[col2].agg({'Avg':'mean'})).reset_index()['Avg']
~\AppData\Roaming\Python\Python36\site-packages\pandas\core\groupby\generic.py in aggregate(self, func, *args, **kwargs)
251 # but not the class list / tuple itself.
252 func = _maybe_mangle_lambdas(func)
--> 253 ret = self._aggregate_multiple_funcs(func)
254 if relabeling:
255 ret.columns = columns
~\AppData\Roaming\Python\Python36\site-packages\pandas\core\groupby\generic.py in _aggregate_multiple_funcs(self, arg)
292 # GH 15931
293 if isinstance(self._selected_obj, Series):
--> 294 raise SpecificationError("nested renamer is not supported")
295
296 columns = list(arg.keys())
SpecificationError: **nested renamer is not supported**
Run Code Online (Sandbox Code Playgroud)
小智 47
改变
temp['total'] = pd.DataFrame(project_data.groupby(col1)[col2].agg({'total':'count'})).reset_index()['total']
temp['Avg'] = pd.DataFrame(project_data.groupby(col1)[col2].agg({'Avg':'mean'})).reset_index()['Avg']
Run Code Online (Sandbox Code Playgroud)
到
temp['total'] = pd.DataFrame(project_data.groupby(col1)[col2].agg(total='count')).reset_index()['total']
temp['Avg'] = pd.DataFrame(project_data.groupby(col1)[col2].agg(Avg='mean')).reset_index()['Avg']
Run Code Online (Sandbox Code Playgroud)
原因:在新的 Pandas 版本中,命名聚合是已弃用的“dict-of-dicts”方法的推荐替代品,用于命名列特定聚合的输出(重命名时使用字典弃用 groupby.agg())。
来源:https : //pandas.pydata.org/pandas-docs/stable/whatsnew/v0.25.0.html
tso*_*orn 46
如果数据框中不存在聚合函数 dict 中指定的列,也会发生此错误:
In [190]: group = pd.DataFrame([[1, 2]], columns=['A', 'B']).groupby('A')
In [195]: group.agg({'B': 'mean'})
Out[195]:
B
A
1 2
In [196]: group.agg({'B': 'mean', 'non-existing-column': 'mean'})
...
SpecificationError: nested renamer is not supported
Run Code Online (Sandbox Code Playgroud)
小智 6
我找到了方法:而不是像
g2 = df.groupby(["Description","CustomerID"],as_index=False).agg({'Quantity':{"maxQ":np.max,"minQ":np.min,"meanQ":np.mean}})
g2.columns = ["Description","CustomerID","maxQ","minQ",'meanQ']
Run Code Online (Sandbox Code Playgroud)
执行如下操作:
g2 = df.groupby(["Description","CustomerID"],as_index=False).agg({'Quantity':{np.max,np.min,np.mean}})
g2.columns = ["Description","CustomerID","maxQ","minQ",'meanQ']
Run Code Online (Sandbox Code Playgroud)
我有同样的错误,这就是我解决它的方法!