Comment additionner les éléments en double dans une liste de listes de dictionnaires?
Exemple de liste:
data = [
[
{'user': 1, 'rating': 0},
{'user': 2, 'rating': 10},
{'user': 1, 'rating': 20},
{'user': 3, 'rating': 10}
],
[
{'user': 4, 'rating': 4},
{'user': 2, 'rating': 80},
{'user': 1, 'rating': 20},
{'user': 1, 'rating': 10}
],
]
Production attendue:
op = [
[
{'user': 1, 'rating': 20},
{'user': 2, 'rating': 10},
{'user': 3, 'rating': 10}
],
[
{'user': 4, 'rating': 4},
{'user': 2, 'rating': 80},
{'user': 1, 'rating': 30},
],
]
op = []
for lst in data:
rating_of_user = {}
for e in lst:
user, rating = e['user'], e['rating']
rating_of_user[user] = rating_of_user.get(user, 0) + rating
op.append([{'user': u, 'rating': r} for u, r in rating_of_user.items()])
N.B .: puisque Python 3.7 les dictionnaires conservent officiellement l'ordre d'insertion
Cela devrait fonctionner:
from collections import defaultdict
data_without_duplicates = []
for l in data:
users_ratings = defaultdict(int)
for d in l:
users_ratings[d["user"]] += d["rating"]
data_without_duplicates.append(
[{"user": user, "rating": rating} for user, rating in users_ratings.items()]
)
Compréhension de liste Python:
from collections import Counter
x = [[
{'user': x[0], 'rating': x[1]} for x in
Counter({d['user']: d['rating'] for d in group}).most_common()] for group in data
]
Production:
[
[
{
"rating": 20,
"user": 1
},
{
"rating": 10,
"user": 2
},
{
"rating": 10,
"user": 3
}
],
[
{
"rating": 80,
"user": 2
},
{
"rating": 10,
"user": 1
},
{
"rating": 4,
"user": 4
}
]
]
data = [
[
{'user': 1, 'rating': 0},
{'user': 2, 'rating': 10},
{'user': 1, 'rating': 20},
{'user': 3, 'rating': 10}
],
[
{'user': 4, 'rating': 4},
{'user': 2, 'rating': 80},
{'user': 1, 'rating': 20},
{'user': 1, 'rating': 10}
],
]
keyname = "user"
all = []
for row in data:
row_out = []
for d in row:
key = d[keyname]
for d2 in row_out:
if d2[keyname] == d[keyname]:
break
else:
d2 = {keyname: key}
row_out.append(d2)
for k, v in d.items():
if k == keyname:
continue
d2[k] = d2.get(k, 0) + v
all.append(row_out)
print(all)
donne:
[[{'user': 1, 'rating': 20}, {'user': 2, 'rating': 10}, {'user': 3, 'rating': 10}], [{'user': 4, 'rating': 4}, {'user': 2, 'rating': 80}, {'user': 1, 'rating': 30}]]
Le tri doit être évité car chaque élément peut être traité en un seul passage. Toute technique basée sur le hachage devrait être meilleure.
Voici une solution alternative, qui utilise un defaultdict au lieu de coûteux sort/groupby ou pandas.
from collections import defaultdict
from functools import reduce
def reduce_func(state, item):
new_obj = {
"user": item["user"],
"rating": state[item["user"]]["rating"] + item["rating"]}
}
state[item["user"]] = new_obj
return state
output = [list(reduce(reduce_func, elem, defaultdict(lambda: {"rating": 0})).values())
for elem in data]
import pprint
data = [
[
{'user': 1, 'rating': 0},
{'user': 2, 'rating': 10},
{'user': 1, 'rating': 20},
{'user': 3, 'rating': 10}
],
[
{'user': 4, 'rating': 4},
{'user': 2, 'rating': 80},
{'user': 1, 'rating': 20},
{'user': 1, 'rating': 10}
],
]
def find(user, l):
for i, d in enumerate(l):
if user == d['user']:
return i
return -1
data_sum = []
for l in data:
list_sum = []
for d in l:
idx = find(d['user'], list_sum)
if idx == -1:
list_sum.append(d)
else:
list_sum[idx]['rating'] += d['rating']
data_sum.append(list_sum)
pprint.pprint(data_sum)