Skip to content Skip to sidebar Skip to footer

Python Interaction Between Columns And Rows

I have the following dataframe: topic student level week 1 a 1 1 1 b 2 1 1 a 3 1 2 a

Solution 1:

Full answer, tested, sorry for the previous version, there were many typos....

import pandas as pd
from itertools import permutations

dataframe = {"topic": [1,1,1,2,2,2,2,3,3,3,3,3],
             "student": ["a","b","a","a","b","a","b","c","b","c","a","b"],
             "level": [1,2,3,1,2,3,4,1,2,3,4,5],
             "week": [1,1,1,2,2,2,2,2,2,2,2,2]
             }
dataframe =  pd.DataFrame.from_dict(dataframe)
dataframe = dataframe.reindex_axis(("topic", "student", "level", "week",), axis = 1)


results = {}  # the dictionary where results is going to be stored
source = False# a simple boolean to make sure message 2 follows message 1
prev_topic = dataframe.get_value(0,'topic')  # boolean to detect topic change
topic_users = set()  # set containing the curent users of the topic
prev_week = None# variable to check if week is constant in topic.# print(dataframe)for row in dataframe.get_values():  # iterate over the dataframe# print(prev_topic)if prev_topic == row[0]:  # if we are on the same topic# print("same_topic")# print(row)if row[2] == 1:  # if it is an initial message# print("first message")
            source = row[1]  # we store users as source
            topic_users.add(source)  # add the user to the topic's set of users
            week = row[3]  # we store the weekelif row[2] == 2and source:  # if this is a second message# print("scd")
            destination = row[1]  # store user as destination
            topic_users.add(destination)  # add the user to the topic's set of usersif week != row[3]:  # if the week differs, we print a messageprint("ERROR: Topic " + str(row[0]) + " extends on several weeks")
                # break  # uncomment the line to exit the for loop if error is met

            key = "-".join((source, destination, str(week)))  # construct a key based on source/destination/weekif key notin results:  # if the key is new to dictionary
                results[key] = [0, 0]  # create the new entry as a list containing topic_counts, reply_counts

            results[key][1] += 1# add a counter to the reply_counts
            source = False# reset destinationelse:
            # print("trololo")
            topic_users.add(row[1])  # add the user to the topic's set of usersif week != row[3]:  # if the week differs, we print a messageprint("ERROR: Topic " + str(row[0]) + " extends on several weeks")
                # break  # uncomment the line to exit the for loop if error is met

            source = False# reset destinationelse:  # if we enconter a new topic (and not the first one)# print('new topic')for pair in permutations(topic_users, 2):
            key = "-".join(pair) + "-" + str(week)  # construct a key based on source/destination/weekif key notin results:   # if the key is new to dictionary
                results[key] = [1, 0]  # create the new entry as a list containing topic_counts, reply_countselse:  # otherwise
                results[key][0] += 1# add a counter to the topic_counts

        topic_users = set()
        if row[2] == 1:  # if it is an initial message# print("first message")
            source = row[1]  # we store users as source
            topic_users.add(source)  # add the user to the topic's set of users
            week = row[3]  # we store the week

    prev_topic = row[0]

# redo the topic count feeding for the last topic (for wich we didn't detect a change of topic)iflen(topic_users) > 0:
    for pair in permutations(topic_users, 2):
        key = "-".join(pair) + "-" + str(week)  # construct a key based on source/destination/weekif key notin results:   # if the key is new to dictionary
            results[key] = [1, 0]  # create the new entry as a list containing topic_counts, reply_countselse:  # otherwise
            results[key][0] += 1# add a counter to the topic_counts

dico = {'source': [], 'destination': [], 'week': [], 'topic': [], 'reply': []}
for k, v in results.items():
    print(k, v)
    s, d, w = k.split('-')
    dico['source'].append(s)
    dico['destination'].append(d)
    dico['week'].append(w)
    dico['topic'].append(v[0])
    dico['reply'].append(v[1])

df = pd.DataFrame.from_dict(dico)
df = df.reindex_axis(("source", "destination", "week", "topic", "reply"), axis = 1)
print(df)

Solution 2:

My suggestion:

I would use a dictionary containing 'source-destination-week' as keys and (total_topics, reply_counts) as values.

Loop over the first dataframe, for each question, store who posted 1st message as the destination, store who posted 2nd message as source, store the week as week, add a counter in dictionary at key 'source-destination-week'. I note that you no longer require to display students pairs with no interaction, thus I removed it. eg:

from itertools import permutations

results = {}  # the dictionary where results is going to be stored
source = False# a simple boolean to make sure message 2 follows message 1
prev_topic = None# boolean to detect topic change
topic_users = set()  # set containing the curent users of the topic
prev_week = None# variable to check if week is constant in topic.for row in dataframe:  # iterate over the dataframeif prev_topic = row[0]:  # if we are on the same topicif row[2] == 1:  # if it is an initial message
            source = row[1]  # we store users as source
            topic_users.add(source)  # add the user to the topic's set of users
            week = row[3]  # we store the weekelif row[2] == 2and source:  # if this is a second message
            destination = row[1]  # store user as destination
            topic_users.add(destination)  # add the user to the topic's set of usersif week != row[3]:  # if the week differs, we print a messageprint"ERROR: Topic " + str(row[0]) + " extends on several weeks"# break  # uncomment the line to exit the for loop if error is met

            key = "-".join((source, destination, week))  # construct a key based on source/destination/weekif key notin results:  # if the key is new to dictionary
                results[key] = [0, 0]  # create the new entry as a list containing topic_counts, reply_counts

            results[key][1] += 1# add a counter to the reply_counts
            source = False# reset destinationelse:
            topic_user.add(row[1])  # add the user to the topic's set of usersif week != row[3]:  # if the week differs, we print a messageprint"ERROR: Topic " + str(row[0]) + " extends on several weeks"# break  # uncomment the line to exit the for loop if error is met

            source = False# reset destinationelif prev_topic != None:  # if we enconter a new topic (and not the first one)for pair in permutations(topic_users, 2):
            key = "-".join(pair) + "-" + week  # construct a key based on source/destination/weekif key notin results:   # if the key is new to dictionary
                results[key] = [1, 0]  # create the new entry as a list containing topic_counts, reply_countselse:  # otherwise
                results[key][0] += 1# add a counter to the topic_counts

        topic_users = set()

    prev_topic = row[0]

# redo the topic count feeding for the last topic (for wich we didn't detect a change of topic)iflen(topic_users) > 0: 
    for pair in permutations(topic_users, 2):
        key = "-".join(pair) + "-" + week  # construct a key based on source/destination/weekif key notin results:   # if the key is new to dictionary
            results[key] = [1, 0]  # create the new entry as a list containing topic_counts, reply_countselse:  # otherwise
            results[key][0] += 1# add a counter to the topic_counts

then you can convert your dictionary back into a dataframe. eg:

dico = {'b-a': [0,1], 'b-c' : [1,1], 'a-b': [2,1]}
df = pd.DataFrame.from_dict(dico, orient='index')
df.rename(index="str", columns={0:'topic', 1:'reply'})

I hope I didn't make any typo in the code, couldn't test it yet... At your disposal for any question :)

Post a Comment for "Python Interaction Between Columns And Rows"