不是每个人都可以使用 python 和 pandas(想想 dataAnalysts 和 BI dudes :))
这是标准 SQL@Bigquery 中的动态数据透视过程。
它还没有聚合。
首先,您需要提供一个包含 pe-KPI 汇总值的表格(如果需要)。
但它会自动创建一个表并生成所有透视列。
开始的假设是您输入了一个像这样的表 myDataset.myTable :
LONG,LAT,KPI,US,EUR
A,1,温度, 78,45
A,1,压力, 120,114
B,1,temp,12,8
B,1,压力, 85,52
如果您像这样调用以下程序:
CALL warehouse.pivot ('myDataset','myTable',['LONG','LAT'], 'KPI');
您将获得一个名为 myDataset.myTable_pivot 的新表,如下所示:
LONG,LAT,temp_US,temp_EUR,pressure_US, pressure_EUR
A,1,78,45,120,114
B,1,12,8,85,52
这里是代码:
create or replace procedure warehouse.pivot (dataset STRING, table_to_pivot STRING, ls_pks ARRAY<STRING>, pivot_column STRING)
BEGIN
DECLARE sql_pivot STRING;
DECLARE sql_pk_string STRING;
DECLARE sql_val_string STRING;
DECLARE sql_pivot_cols STRING DEFAULT "";
DECLARE pivot_cols_stmt STRING;
DECLARE pivot_ls_values ARRAY<STRING>;
DECLARE ls_pivot_value_columns ARRAY<STRING>;
DECLARE nb_pivot_col_values INT64;
DECLARE nb_pivot_val_values INT64;
DECLARE loop_index INT64 DEFAULT 0;
DECLARE loop2_index INT64 DEFAULT 0;
SET sql_pk_string= ( array_to_string(ls_pks,',') ) ;
/* get the values of pivot column to prepare the new columns in out put*/
SET pivot_cols_stmt = concat(
'SELECT array_agg(DISTINCT cast(', pivot_column ,' as string) ORDER BY ', pivot_column,' ) as pivot_ls_values, ',
'count(distinct ',pivot_column,') as nb_pivot_col_values ',
' FROM ', dataset,'.', table_to_pivot
);
EXECUTE IMMEDIATE pivot_cols_stmt into pivot_ls_values, nb_pivot_col_values;
/*get the name of value columns to preapre the new columns in output*/
set sql_val_string =concat(
"select array_agg(COLUMN_NAME) as ls_pivot_value_columns, count(distinct COLUMN_NAME) as nb_pivot_val_values ",
"FROM ",dataset,".INFORMATION_SCHEMA.COLUMNS where TABLE_NAME='",table_to_pivot,"' ",
"and COLUMN_NAME not in ('",array_to_string(ls_pks,"','"),"', '",pivot_column,"')"
);
EXECUTE IMMEDIATE sql_val_string
into ls_pivot_value_columns, nb_pivot_val_values ;
/*create statement to populate the new columns*/
while loop_index < nb_pivot_col_values DO
set loop2_index =0;
loop
SET sql_pivot_cols= concat (
sql_pivot_cols,
"max( ",
"if( ", pivot_column , "= '",pivot_ls_values[OFFSET (loop_index)],"' , ", ls_pivot_value_columns[OFFSET (loop2_index)], ", null) ",
") as ", pivot_ls_values[OFFSET (loop_index)], "_", ls_pivot_value_columns[OFFSET (loop2_index)],", "
);
SET loop2_index = loop2_index +1;
if loop2_index >= nb_pivot_val_values then
break;
end if;
END LOOP;
SET loop_index =loop_index+ 1;
END WHILE;
SET sql_pivot =concat (
"create or replace TABLE ", dataset,".",table_to_pivot,"_pivot as SELECT ",
sql_pk_string, ",", sql_pivot_cols, " FROM ",dataset,".", table_to_pivot ,
" GROUP BY ", sql_pk_string
);
EXECUTE IMMEDIATE sql_pivot;
END;
奇怪的事情:嵌套的 while 循环在 BQ 中不起作用。只执行最后一个 while 循环。这就是为什么在程序代码中混合了 WHILE 和 LOOP