Python Interaction Between Columns And Rows
I have the following dataframe: topic student level week 1 a 1 1 1 b 2 1 1 a 3 1 2 a
Solution 1:
Full answer, tested, sorry for the previous version, there were many typos....
import pandas as pd
from itertools import permutations
dataframe = {"topic": [1,1,1,2,2,2,2,3,3,3,3,3],
"student": ["a","b","a","a","b","a","b","c","b","c","a","b"],
"level": [1,2,3,1,2,3,4,1,2,3,4,5],
"week": [1,1,1,2,2,2,2,2,2,2,2,2]
}
dataframe = pd.DataFrame.from_dict(dataframe)
dataframe = dataframe.reindex_axis(("topic", "student", "level", "week",), axis = 1)
results = {} # the dictionary where results is going to be stored
source = False# a simple boolean to make sure message 2 follows message 1
prev_topic = dataframe.get_value(0,'topic') # boolean to detect topic change
topic_users = set() # set containing the curent users of the topic
prev_week = None# variable to check if week is constant in topic.# print(dataframe)for row in dataframe.get_values(): # iterate over the dataframe# print(prev_topic)if prev_topic == row[0]: # if we are on the same topic# print("same_topic")# print(row)if row[2] == 1: # if it is an initial message# print("first message")
source = row[1] # we store users as source
topic_users.add(source) # add the user to the topic's set of users
week = row[3] # we store the weekelif row[2] == 2and source: # if this is a second message# print("scd")
destination = row[1] # store user as destination
topic_users.add(destination) # add the user to the topic's set of usersif week != row[3]: # if the week differs, we print a messageprint("ERROR: Topic " + str(row[0]) + " extends on several weeks")
# break # uncomment the line to exit the for loop if error is met
key = "-".join((source, destination, str(week))) # construct a key based on source/destination/weekif key notin results: # if the key is new to dictionary
results[key] = [0, 0] # create the new entry as a list containing topic_counts, reply_counts
results[key][1] += 1# add a counter to the reply_counts
source = False# reset destinationelse:
# print("trololo")
topic_users.add(row[1]) # add the user to the topic's set of usersif week != row[3]: # if the week differs, we print a messageprint("ERROR: Topic " + str(row[0]) + " extends on several weeks")
# break # uncomment the line to exit the for loop if error is met
source = False# reset destinationelse: # if we enconter a new topic (and not the first one)# print('new topic')for pair in permutations(topic_users, 2):
key = "-".join(pair) + "-" + str(week) # construct a key based on source/destination/weekif key notin results: # if the key is new to dictionary
results[key] = [1, 0] # create the new entry as a list containing topic_counts, reply_countselse: # otherwise
results[key][0] += 1# add a counter to the topic_counts
topic_users = set()
if row[2] == 1: # if it is an initial message# print("first message")
source = row[1] # we store users as source
topic_users.add(source) # add the user to the topic's set of users
week = row[3] # we store the week
prev_topic = row[0]
# redo the topic count feeding for the last topic (for wich we didn't detect a change of topic)iflen(topic_users) > 0:
for pair in permutations(topic_users, 2):
key = "-".join(pair) + "-" + str(week) # construct a key based on source/destination/weekif key notin results: # if the key is new to dictionary
results[key] = [1, 0] # create the new entry as a list containing topic_counts, reply_countselse: # otherwise
results[key][0] += 1# add a counter to the topic_counts
dico = {'source': [], 'destination': [], 'week': [], 'topic': [], 'reply': []}
for k, v in results.items():
print(k, v)
s, d, w = k.split('-')
dico['source'].append(s)
dico['destination'].append(d)
dico['week'].append(w)
dico['topic'].append(v[0])
dico['reply'].append(v[1])
df = pd.DataFrame.from_dict(dico)
df = df.reindex_axis(("source", "destination", "week", "topic", "reply"), axis = 1)
print(df)
Solution 2:
My suggestion:
I would use a dictionary containing 'source-destination-week' as keys and (total_topics, reply_counts) as values.
Loop over the first dataframe, for each question, store who posted 1st message as the destination, store who posted 2nd message as source, store the week as week, add a counter in dictionary at key 'source-destination-week'. I note that you no longer require to display students pairs with no interaction, thus I removed it. eg:
from itertools import permutations
results = {} # the dictionary where results is going to be stored
source = False# a simple boolean to make sure message 2 follows message 1
prev_topic = None# boolean to detect topic change
topic_users = set() # set containing the curent users of the topic
prev_week = None# variable to check if week is constant in topic.for row in dataframe: # iterate over the dataframeif prev_topic = row[0]: # if we are on the same topicif row[2] == 1: # if it is an initial message
source = row[1] # we store users as source
topic_users.add(source) # add the user to the topic's set of users
week = row[3] # we store the weekelif row[2] == 2and source: # if this is a second message
destination = row[1] # store user as destination
topic_users.add(destination) # add the user to the topic's set of usersif week != row[3]: # if the week differs, we print a messageprint"ERROR: Topic " + str(row[0]) + " extends on several weeks"# break # uncomment the line to exit the for loop if error is met
key = "-".join((source, destination, week)) # construct a key based on source/destination/weekif key notin results: # if the key is new to dictionary
results[key] = [0, 0] # create the new entry as a list containing topic_counts, reply_counts
results[key][1] += 1# add a counter to the reply_counts
source = False# reset destinationelse:
topic_user.add(row[1]) # add the user to the topic's set of usersif week != row[3]: # if the week differs, we print a messageprint"ERROR: Topic " + str(row[0]) + " extends on several weeks"# break # uncomment the line to exit the for loop if error is met
source = False# reset destinationelif prev_topic != None: # if we enconter a new topic (and not the first one)for pair in permutations(topic_users, 2):
key = "-".join(pair) + "-" + week # construct a key based on source/destination/weekif key notin results: # if the key is new to dictionary
results[key] = [1, 0] # create the new entry as a list containing topic_counts, reply_countselse: # otherwise
results[key][0] += 1# add a counter to the topic_counts
topic_users = set()
prev_topic = row[0]
# redo the topic count feeding for the last topic (for wich we didn't detect a change of topic)iflen(topic_users) > 0:
for pair in permutations(topic_users, 2):
key = "-".join(pair) + "-" + week # construct a key based on source/destination/weekif key notin results: # if the key is new to dictionary
results[key] = [1, 0] # create the new entry as a list containing topic_counts, reply_countselse: # otherwise
results[key][0] += 1# add a counter to the topic_counts
then you can convert your dictionary back into a dataframe. eg:
dico = {'b-a': [0,1], 'b-c' : [1,1], 'a-b': [2,1]}
df = pd.DataFrame.from_dict(dico, orient='index')
df.rename(index="str", columns={0:'topic', 1:'reply'})
I hope I didn't make any typo in the code, couldn't test it yet... At your disposal for any question :)
Post a Comment for "Python Interaction Between Columns And Rows"