您可以制作如下所示的通用函数(灵感来自我之前的回答here):
def stack_multiple(data,cols=None,output_columns=["col","values"]):
"""stacks multiple columns in a dataframe,
takes all columns by default unless passed a list of values"""
cols = data.columns if cols is None else cols
cols= [cols] if isinstance(cols,str) else cols
return data.selectExpr(f"""stack({len(cols)},{','.join(map(','.join,
(zip([f'"{i}"' for i in cols],[f"`{i}`" for i in cols]))))})
as ({','.join(output_columns)})""")
样本运行:
stack_multiple(df).show()
+---+------+
|col|values|
+---+------+
| A| 1|
| B| 2|
| C| 3|
| D| 4|
| A| 10|
| B| 11|
| C| 12|
| D| 13|
+---+------+
stack_multiple(df,['A','B'],output_columns=['A','B']).show()
+---+---+
| A| B|
+---+---+
| A| 1|
| B| 2|
| A| 10|
| B| 11|
+---+---+