groupby: 分组
melt: 宽表转长表
pivot_table: 长表转宽表,数据透视表
crosstab: 交叉表 / 列联表,主要用于分组频数统计
import numpy as np import pandas as pd df = pd.DataFrame({'key1':['a','a','b','b','a'], 'key2':['one','two','one','two','one'], 'data1':np.random.randn(5), 'data2':np.random.randn(5) }) # key1 key2 data1 data2 #0 a one 0.498857 0.074495 #1 a two 2.872086 -1.303973 #2 b one -0.721171 -1.315390 #3 b two 0.985844 0.419780 #4 a one -0.134534 -0.732861 # #################### '''groupby 用法''' group1 = df.groupby('key1') group2 = df.groupby(['key1','key2']) [x for x in group1] group1.size() group1.sum() group2.count() group1['data1','data2'].agg(['mean','sum']) #作用于所有列 group2(['key1','key2']).apply(lambda x: pd.Series([x.shape[0], x['key1'].mean(), x['key2'].sum()], index=['counts', 'key1_mean', 'key2_sum'])) #作用于指定列 # #################### ''' melt 用法 -- 宽表转长表 ''' pd.melt(df, id_vars=['key1', 'key2'], value_vars=['data1', 'data2'], var_name='var', value_name='value') #col_level # key1 key2 var value #0 a one data1 0.498857 #1 a two data1 2.872086 #2 b one data1 -0.721171 #3 b two data1 0.985844 #4 a one data1 -0.134534 #5 a one data2 0.074495 #6 a two data2 -1.303973 #7 b one data2 -1.315390 #8 b two data2 0.419780 #9 a one data2 -0.732861 # #################### ''' crosstab 用法 -- 列联表(count) ''' pd.crosstab(df.key1, df.key2, margins=True) #key2 one two All #key1 #a 2 1 3 #b 1 1 2 #All 3 2 5 # #################### ''' pivot_table 用法 -- 长表转宽表 ''' # pd.pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', fill_value=None, # dropna=True, margins=False, margins_name='ALL') #aggfunc={'d':np.sum, 'e':np.max} pd.pivot_table(df, index='key1', columns='key2') # data1 data2 #key2 one two one two #key1 #a 0.182162 2.872086 -0.329183 -1.303973 #b -0.721171 0.985844 -1.315390 0.419780 df.pivot_table(['data1'], index='key1', columns='key2', fill_value=0) #['data1'] # data1 #key2 one two #key1 #a 0.182162 2.872086 #b -0.721171 0.985844
参考链接: