根据您对上一个答案的评论。看起来 top_2_total 是 2 个最大值的总和。为此,您需要编写一些额外的步骤。我正在使用 proc transpose 和 datastep 来获得上一个答案中已经实现的功能。我已经对 PROC Summary 进行了编码,以获得前 2 个最大值并重用数据集来创建最终答案。如果有帮助,请告诉我。
data have;
do firm = 1 to 3;
revenue = rand("uniform");
costs = rand("uniform");
profits = rand("uniform");
vcost = rand("uniform");
output;
end;
run;
proc transpose data=have out=want prefix=top_;
var revenue--vcost;
run;
data want;
set want end=eof;
array top(*) top_3-top_1;
call sortn(of top[*]);
total=sum(of top[*]);
run;
/* Getting the maximum 2 total values using PROC SUMMARY*/
proc summary data=want nway;
output out=total_top_2_rec(drop=_:) idgroup(max(total) out[2](total)=);
run;
data want;
/* Loop to get the values from previous step and generate TOP_2_TOTAL variable */
if _n_=1 then set total_top_2_rec;
top_2_total=sum(total_1,total_2);
set want;
if sum(top_1,top_2) > 0.9 * top_2_total then Flag90=1; else Flag90=0;
if top_1 > top_2_total * 0.6 then Flag60=1; else Flag60=0;
drop total_1 total_2;
run;
proc print data=want;run;
编辑:我在我的 PROC TRANSPOSE 之前添加了一个逻辑,您可以在其中添加要考虑的变量以进行计算,其余部分由代码完成。此后,代码执行者无需进行任何手动更改。变量应作为空格分隔的列表输入。
data have;
infile 'C:\dataset (1).csv' missover dsd dlm=',' firstobs=2;
input firm v1 v2 v3;
run;
/* add/remove columns here to consider variable */
%let variable_to_consider=v1
v2
v3
;
%let variable_to_consider=%cmpres(&variable_to_consider);
proc sql noprint;
select count(*) into : obs_count from have;
quit;
%let obs_count=&obs_count;
proc transpose data=have out=want prefix=top_;
var &variable_to_consider;
run;
data want;
set want end=eof;
array top(*) top_&obs_count.-top_1;
x=dim(top);
call sortn(of top[*]);
total=sum(of top[*]);
keep total top_1 top_2 _name_;
run;
/* Getting the maximum 2 total values using PROC SUMMARY*/
proc summary data=want nway;
output out=total_top_2_rec(drop=_:) idgroup(max(total) out[2](total)=);
run;
data want;
/* Loop to get the values from previous step and generate TOP_2_TOTAL variable */
if _n_=1 then set total_top_2_rec;
top_2_total=sum(total_1,total_2);
set want;
if sum(top_1,top_2) > 0.9 * top_2_total then Flag90=1; else Flag90=0;
if top_1 > top_2_total * 0.6 then Flag60=1; else Flag60=0;
drop total_1 total_2;
run;
proc print data=want;run;
EDIT 2014-04-05 : 如前所述,我已经更新了逻辑并修复了问题。以下是更新后的代码。
data have1;
do firm = 1 to 3;
revenue = rand("uniform");
costs = rand("uniform");
profits = rand("uniform");
vcost = rand("uniform");
output;
end;
run;
data have2;
infile 'dataset (1).csv' missover dsd dlm=',' firstobs=2;
input firm v1 v2 v3;
run;
/* add/remove columns here to consider variable */
%macro mymacro(input_dataset= ,output_dataset=, variable_to_consider=);
%let variable_to_consider=%cmpres(&variable_to_consider);
proc sql noprint;
select count(*) into : obs_count from &input_dataset;
quit;
%let obs_count=&obs_count;
proc transpose data=&input_dataset out=&output_dataset prefix=top_;
var &variable_to_consider;
run;
data &output_dataset;
set &output_dataset end=eof;
array top(*) top_&obs_count.-top_1;
x=dim(top);
call sortn(of top[*]);
total=sum(of top[*]);
top_2_total=sum(top_1, top_2);
if sum(top_1,top_2) > 0.9 * total then Flag90=1; else Flag90=0;
if top_1 > total * 0.6 then Flag60=1; else Flag60=0;
keep total top_1 top_2 _name_ top_2_total total Flag60 Flag90;
run;
%mend mymacro;
%mymacro(input_dataset=have1, output_dataset=want1 ,variable_to_consider=revenue costs profits vcost)
%mymacro(input_dataset=have2, output_dataset=want2 ,variable_to_consider=v1 v2 v3 )
proc print data=want1;run;
proc print data=want2;run;