对该主题的部分回答:
- 对类声明进行了一些更正,以便说明唯一性
class Clients:
def __init__(self, city, first_name, last_name, gender, age, weight, uuid):
self.City = city
self.FirstName = first_name
self.LastName = last_name
self.Gender = gender
self.Age = age
self.Weight = weight
self.UUID = uuid
def __eq__(self, other):
if self.City == other.City and \
self.FirstName == other.FirstName and \
self.LastName == other.LastName and \
self.Gender == other.Gender and \
self.Age == other.Age and \
self.Weight == other.Weight and \
self.UUID == other.UUID:
return True
return False
def __hash__(self):
return hash((self.City, self.FirstName, self.LastName, self.Gender, self.Age, self.Weight, self.UUID))
- 假设您有一个包含不同条目的简单列表,例如:
import uuid
list_of_measurements = []
list_of_measurements.append(Clients("Spamburg", "Foo", "Bar", "Apache", 31, 80, uuid.uuid4()))
list_of_measurements.append(Clients("Spamburg", "Foo", "Bar", "Apache", 31, 82, list_of_measurements[0].UUID)) # new weight measurement
list_of_measurements.append(Clients("Spamburg", "Foo", "Bar", "Apache", 32, 83, list_of_measurements[0].UUID)) # new weight measurement, age changed
list_of_measurements.append(Clients("Spamville", "Foo", "Bar", "Apache", 33, 83, list_of_measurements[0].UUID)) # new weight measurement, client moved
list_of_measurements.append(Clients("Spamville", "Foo", "Bar", "Apache", 33, 83, list_of_measurements[0].UUID)) # duplicate measurement
list_of_measurements.append(Clients("Spamville", "Foo", "Bar", "Hind", 33, 83, list_of_measurements[0].UUID)) # client underwent gender change
list_of_measurements.append(Clients("Spamville", "Foo", "ZanziBar", "Hind", 33, 83, list_of_measurements[0].UUID)) # client got married and changed their last name
list_of_measurements.append(Clients("Spamville", "Foo Too", "Bar None", "Hind", 23, 63, uuid.uuid4())) # second unique client
list_of_measurements.append(Clients("Spamville", "Foo Three", "Bar Some", "Apache", 18, 60, uuid.uuid4())) # third unique client
请注意,除了 UUID 之外的所有内容可能会因每个客户而实际发生变化。
- 查找唯一客户数量 - 可以通过使用集合获得:
set_of_clients = set([c.UUID for c in list_of_measurements]) # will only contain unique UUIDs
- 报告有多个体重测量值的客户 - 可以使用 dict 和 sum 完成:
measurements_dict = {uid: sum(1 for c in list_of_measurements if c.UUID == uid) for uid in set_of_clients}
list_of_customers_with_more_than_one_measurement = [uid for uid, m in measurements_dict.items() if m > 1]
print(list_of_customers_with_more_than_one_measurement)
- 删除可能重复的测量值 - 可以通过强制转换为列表和设置来完成,这需要之前定义的类的 eq 和 hash 属性
list_of_deduplicated_measurements = list(set(list_of_measurements))
- 平均年龄,假设您只想对客户最后记录的年龄进行平均
from statistics import mean
ages_dict = {uid: max(c.Age for c in list_of_measurements if c.UUID == uid) for uid in set_of_clients}
average_age = mean(ages_dict.values())