我认为您不能直接使用get_dummies() 执行此操作。但是如何重新组织结果呢?如果我的问题正确,您想重新排序 one-hot-encoded 数据的列以匹配规定的顺序。
categories = ["A", "B", "C"]
Y = pd.get_dummies(data["Article_Topic_1"])
Y = Y[categories].values
这里有一个函数检查该解决方案有效的一些假设。
def get_dummies_for_coding(series, ordering):
# Ordering must contain only values present in series.
assert(len(set(ordering)-set(series.unique()))==0)
# It's easier to work with series here, because pd.get_dummies()
# will operate with string prefixes for data-frames, which make
# things a bit more complicated.
assert(isinstance(series, pd.Series))
dummies = pd.get_dummies(series)
dummies = dummies[ordering]
#return dummies
return dummies.values
# Example
df = pd.DataFrame([["a", "foo"],
["a", "bar"],
["b", "bar"],
["a", "baz"],
["b", "bar"]],
columns=["colA", "colB"])
orderingA = ["b", "a"]
orderingB = ["baz", "bar", "foo"]
ret = get_dummies_for_coding(df["colA"], orderingA)
print(ret)
ret = get_dummies_for_coding(df["colB"], orderingB)
print(ret)