Skip to content Skip to sidebar Skip to footer

Perform Pandas Aggregation Whiles Keeping The Date Column Intact

user = {'id':['abab23', 'abab21', 'abab22', 'abab25', 'abab24', 'abab30', 'abab252', 'abab15'], 'dob':['10-10-1990','1-12-1993', '12-12-2000', '2-10-1999', '2-10-1999', '2-

Solution 1:

Use:

#seelct only necessary columns
activities = activities[['sentconn','receiveconj','sentdate','receivedDate']]

#setnew columns names
activities.columns = ['sent_id','receive_id','sent_date','receive_date']

#ssplit columns names by _ to MultiIndex
activities.columns = activities.columns.str.split('_', expand=True)

#reshape DataFrame andfilterbyiswith id ininnermerge
activities = (activities.stack(0)
                        .rename_axis([None, 'type'])
                        .reset_index(level=1)
                        .merge(user['id']))
print (activities)
      type        date       id
0  receive   2-10-2020   abab24
1  receive   2-10-2020   abab24
2     sent   2-10-2020   abab15
3     sent  11-10-2020   abab15
4  receive   4-10-2020   abab21
5     sent   4-10-2020   abab25
6     sent   5-10-2020   abab23
7  receive  10-10-2020  abab252
8     sent  10-10-2020   abab22
9  receive  11-10-2020   abab30

#get counts by crosstabdf = pd.crosstab([activities['date'], activities['id']], activities['type'])
print (df)
type                receive  sent
dateid                    
10-10-2020 abab22         0     1
           abab252        1     0
11-10-2020 abab15         0     1
           abab30         1     0
2-10-2020  abab15         0     1
           abab24         2     0
4-10-2020  abab21         1     0
           abab25         0     1
5-10-2020  abab23         0     1

Solution 2:

Try this:

activities = {'sentconn':['abab35', 'abab15', 'abab25', 'abab23','abab22', 'abab15'],
             'receiveconn': ['abab24', 'abab24', 'abab21', 'abab35', 'abab252', 'abab30'],
              'sentdate':['2-10-2020', '2-10-2020','4-10-2020', '5-10-2020', '10-10-2020', '11-10-2020'],
               'receivedDate':['2-10-2020', '2-10-2020','4-10-2020', '5-10-2020', '10-10-2020', '11-10-2020']}

user = {'id':['abab23', 'abab21', 'abab22', 'abab25', 'abab24', 'abab30', 'abab252', 'abab15'],
        'dob':['10-10-1990','1-12-1993', '12-12-2000', '2-10-1999', '2-10-1999', '2-10-1999', '2-10-1999', '2-10-1999']}

usr_df = pd.DataFrame(user)
df = pd.DataFrame(activities)

#group by the required columns to get the count.
df1 = df.groupby(['sentdate','sentconn']).agg({'sentconn':'count'})
df2 = df.groupby(['receivedDate','receiveconn']).agg({'receiveconn':'count'})

#rename the axis so that you get common columns to concat
df1 = df1.rename_axis(['date','user'])
df2 = df2.rename_axis(['date','user'])

df = pd.concat([df1, df2],axis=1)\
        .fillna(0)\
        .reset_index()
#filter the user id not present is user df as required.
df = df.loc[df['user'].isin(usr_df['id'])]\
        .set_index(['date','user'])
print(df)

outputs:

                   sentconn  receiveconn
dateuser10-10-2020 abab22        1.00.0
           abab252       0.01.011-10-2020 abab15        1.00.0
           abab30        0.01.02-10-2020  abab15        1.00.0
           abab24        0.02.04-10-2020  abab21        0.01.0
           abab25        1.00.05-10-2020  abab23        1.00.0

Post a Comment for "Perform Pandas Aggregation Whiles Keeping The Date Column Intact"