Sum Values In A List Of Lists Of Dictionaries Using Common Key-value Pairs
How do I sum duplicate elements in a list of lists of dictionaries? Sample list: data = [ [ {'user': 1, 'rating': 0}, {'user': 2, 'rating': 10},
Solution 1:
With pandas
:
>>> import pandas as pd
>>> [pd.DataFrame(dicts).groupby('user', as_index=False, sort=False).sum().to_dict(orient='records') for dicts in data]
[[{'user': 1, 'rating': 20},
{'user': 2, 'rating': 10},
{'user': 3, 'rating': 10}],
[{'user': 4, 'rating': 4},
{'user': 2, 'rating': 80},
{'user': 1, 'rating': 30}]]
Solution 2:
You can try:
from itertools import groupby
result = []
for lst in data:
sublist = sorted(lst, key=lambda d: d['user'])
grouped = groupby(sublist, key=lambda d: d['user'])
result.append([
{'user': name, 'rating': sum([d['rating'] for d in group])}
for name, group in grouped])
# Sort the `result` `rating` wise:
result = [sorted(sub, key=lambda d: d['rating']) for sub in result]
# %%timeit# 7.54 µs ± 220 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
UPDATE (A more efficient solution):
result = []
for lst in data:
visited = {}
for d in lst:
if d['user'] in visited:
visited[d['user']]['rating'] += d['rating']
else:
visited[d['user']] = d
result.append(sorted(visited.values(), key=lambda d: d['rating']))
# %% timeit# 2.5 µs ± 54 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
Result:
# print(result)
[
[
{'user': 2, 'rating': 10},
{'user': 3, 'rating': 10},
{'user': 1, 'rating': 20}
],
[
{'user': 4, 'rating': 4},
{'user': 1, 'rating': 30},
{'user': 2, 'rating': 80}
]
]
Solution 3:
op = []
for lst in data:
rating_of_user = {}
for e in lst:
user, rating = e['user'], e['rating']
rating_of_user[user] = rating_of_user.get(user, 0) + rating
op.append([{'user': u, 'rating': r} for u, r in rating_of_user.items()])
N.B.: since Python 3.7 dictionaries officially preserve the insertion order
Solution 4:
This should work:
from collections import defaultdict
data_without_duplicates = []
for l in data:
users_ratings = defaultdict(int)
for d in l:
users_ratings[d["user"]] += d["rating"]
data_without_duplicates.append(
[{"user": user, "rating": rating} for user, rating in users_ratings.items()]
)
Solution 5:
importpprintdata= [
[
{'user': 1, 'rating': 0},
{'user': 2, 'rating': 10},
{'user': 1, 'rating': 20},
{'user': 3, 'rating': 10}
],
[
{'user': 4, 'rating': 4},
{'user': 2, 'rating': 80},
{'user': 1, 'rating': 20},
{'user': 1, 'rating': 10}
],
]
def find(user, l):
for i, d in enumerate(l):
ifuser== d['user']:
return i
return -1
data_sum = []
for l in data:
list_sum = []
for d in l:
idx = find(d['user'], list_sum)
ifidx== -1:
list_sum.append(d)
else:
list_sum[idx]['rating'] += d['rating']
data_sum.append(list_sum)
pprint.pprint(data_sum)
Post a Comment for "Sum Values In A List Of Lists Of Dictionaries Using Common Key-value Pairs"