您正在使用 dicts 的 dict 覆盖同一键的先前值。您可以改为使用 dict.setdefault 用列表初始化 dict 的新键的每个条目,以便您可以继续向其附加项目:
people = {}
interests = {}
for line in file:
employee_id, interest = line.split(maxsplit=1)
employee_id = int(employee_id)
interest = interest.rstrip()
people.setdefault(employee_id, []).append(interest)
interests.setdefault(interest, []).append(employee_id)
people 变为:
{0: ['Hadoop', 'Big Data', 'HBas', 'Java', 'Spark', 'Storm', 'Cassandra'], 1: ['NoSQL', 'MongoDB', 'Cassandra', 'HBase', 'Postgres'], 2: ['Python', 'skikit-learn', 'scipy', 'numpy', 'statsmodels', 'pandas'], 3: ['R', 'Python', 'statistics', 'regression', 'probability'], 4: ['machine learning', 'regression', 'decision trees', 'libsvm'], 5: ['Python', 'R', 'Java', 'C++', 'Haskell', 'programming languages'], 6: ['statistics', 'probability', 'mathematics', 'theory'], 7: ['machine learning', 'scikit-learn', 'Mahout', 'neural networks'], 8: ['neural networks', 'deep learning', 'Big Data', 'artificial intelligence'], 9: ['Hadoop', 'Java', 'MapReduce', 'Big Data']}
interests 变为:
{'Hadoop': [0, 9], 'Big Data': [0, 8, 9], 'HBas': [0], 'Java': [0, 5, 9], 'Spark': [0], 'Storm': [0], 'Cassandra': [0, 1], 'NoSQL': [1], 'MongoDB': [1], 'HBase': [1], 'Postgres': [1], 'Python': [2, 3, 5], 'skikit-learn': [2], 'scipy': [2], 'numpy': [2], 'statsmodels': [2], 'pandas': [2], 'R': [3, 5], 'statistics': [3, 6], 'regression': [3, 4], 'probability': [3, 6], 'machine learning': [4, 7], 'decision trees': [4], 'libsvm': [4], 'C++': [5], 'Haskell': [5], 'programming languages': [5], 'mathematics': [6], 'theory': [6], 'scikit-learn': [7], 'Mahout': [7], 'neural networks': [7, 8], 'deep learning': [8], 'artificial intelligence': [8], 'MapReduce': [9]}