fileneme myfile "E:\data\chap1\garden0.dat";
data garden;
infile myfile;
input name$ tomato$ zucchini peas grapes;
zone=14; *建立新變量并直接賦值
type="home";
zucchini=zucchini*10;*改變已有變量
total=tomato+zucchini+peas+grapes;
periom=tomato/total;*利用原有變量計算新變量
run;data garden1;
set garden;keep name total; *保留變量
run;data garden2;
set garden;drop zone type; *刪除變量
run;data garden3(replace=yes);新建立的garden3覆蓋了garden2
set garden2;
drop namerename total=all; *重命名變量
run;data garden4(replace=yes);
set garden;
logtomato=log(tomato); *添加變量
firstchar=substr(name,1,2); *添加變量,提取name前兩個字母
if grapes<50 then groupgrapes="low";*添加變量groupgrapes
else groupgrapes="high";
run;data garden4(replace=yes);
set garden;
logtomato=log(tomato); *添加變量
firstchar=substr(name,1,2); *添加變量,提取name前兩個字母
if grapes<50 then groupgrapes="low";
else if grapes>=50 and grapes<100 then groupgrapes="med";
else groupgrapes="high";
run;data garden4(replace=yes);
set garden;
logtomato=log(tomato); *添加變量
firstchar=substr(name,1,2); *添加變量,提取name前兩個字母
if grapes<50 then *do--end 語句塊,多變量嵌套
do;groupgrapes="low";zone=16;
else
do;groupgrapes="high";zone=12;
end;
run;
##1.5變量的運算 ###邏輯運算 EQ NE GT LT GE LE IN ###數值或文本型變量: 取整(INT) 對數(LOG) 極值(MIN,MAX) 均值(MEAN) 求和(SUM) 計數(N,NMISS) 四舍五入(ROUND) ###日期型變量 -year(date): -month(date) -day(date) -weekday(date) mdy(m,d,yr):生成ye年m月d日的sas日期值 ###字符型變量 lowcase(s),upcase(s):改變大小寫 substr(s,p,n):從字符串s中第p個開始抽取n個 repeat(s,n):將s重復n次 index(s,s1):查找s1在s中的位置 length(s):返回s的長度 tranwrd(s,s1,s2):從字符串s中把所有字符s1替換成s2后的結果
##1.6基于行的運算操作 -排序 sort+by -提取部分觀測 –指定條件提取 if where –隨機提取 surveyselect -添加新觀測 append+base,data -刪除已有觀測:delete語句 -修改已有觀測
*排序
filename myfile "文件夾路徑";
data marine;
infile myfile(lengths.dat);
input name$ family$ length@@;
run;
proc sort data=marine out=seasort nodupkey; *輸出為數據集seasort,不加out時覆蓋原有數據集;nodupkey刪除冗余觀測
by family descending length; *先按family升序排列,再按length降序排列
run;
proc print data=seasort;
title print data=seasort;;
run;*提取指定條件的觀測
data seasort_whalel;
set seasort;
if Family="whale";
run;
*or
data seasort_whalel;
set seasort;
where Family="whale";
run;*where與by
proc sort data=marine out=seasort_whale nodupkey;
where Family="whale";
by descending length; *先按family升序排列,再按length降序排列
run;
proc print data=seasort_whale;
title "whales only (sorted);
run;*隨機抽取
proc surveyselect data=marine method=srs n=5 out=sampled_marine;
*從marine中抽取5個數形成sampled_marine,其后可以加seed=1以多次抽取相同的數
run;
proc print data=sampled_marine;
run;*添加新觀測(像原有數據集中添加一條或者多條記錄)
data temp(replace=yes);
input name$ family$ length;
datalines;
aaa shark 60
;
run;
proc append base=marine data=temp;
run;
proc print data=marine;*刪除指定的觀測
data marine;
set marine;;
if family="" then delete; *刪除缺失值
run;data marine;
set marine;
if _n_=2 or _n_=3 then delete; *刪除第2和第3個觀測??
run;*重編碼
data marine;
set marine;
if family="" then family="unknown";
run;*轉置(僅限于數值變量)
PROC TRANSPOSE <DATA=輸入數據集 OUT=轉置數據集><選項列表>
VAR 變量列表
ID 變量
COPY 變量列表
RUN;libname chap1 "";
proc transpose data=chap1.A out=chap1.TA name=course;
/將數據集cha1.A轉置成chap1.TA, 變量名name改為course*/
var statistics chinese; /*指定轉置變量*/
id name;
run;
##1.7多數據集操作 –復制數據集 data +set –拆分數據集 data+output –合并數據集
data marine0;set marine;
run;*拆分數據集
data whale shark(replace=yes);
set marine0;
if family="whale" then output whale;
else if family="shark" then output=shark;
run;*數據縱向連接
set 數據集名稱1 數據集名稱2... 數據集名稱n;
libname chap1 "";
data chap1.AB;
set chap1.A chap1.B; /*合并數據集chap1.A和chap1.B*/
run;data chap1.AC;
set chap1.A chap1.C; /*合并數據集chap1.A和chap1.C*/
run;*數據集橫向合并
libname chap1 "";
proc sort data=chap1.C out=chap1.C;
by name;
run;
proc sort data=chap1.D out=chap1.D;
by name;
run;
/*以上對數據集chap1.C和chap1.D按照關鍵變量name升序排列/
data chap1.CD;
merge chap1.C chap1.D; /*橫向合并數據集chap1.C和chap1.D*/
by name; /*設置關鍵變量為id*/
run;##無數據集 _NULL_
不產生數據集,僅在log頁面下顯示輸出
data _NULL_;
x=exp(5);
y=log(10);
put x= y=;
run;data _NULL_;
x=0.1;
y=1-(1-x)**50;
y2=round(y,0.01);
y4=round(y,0.0001);
put y2= y4=;
run;
###1.8循環語句 1.DO循環 格式: DO 計數變量=起始值 TO 結束值 BY 步長; 循環體…; END; 在循環體中可以用LEAVE語句跳出循環,使用COUNTRY語句結束本輪循環,調整計數變量進入下一輪循環。
data chap2.example2_1;
do x=5 to 30 by 5; /*設置x的起始值為5,終止值為30,步長為5*/
y=log(x); /*取y值為x的自然對數*/
if y>3 then leave; /*若y的值大于3結束循環*/
output;
format y 8.5; /*定義y的輸出形式為總長度為8位,小數部分占5位*/
end; /*結束循環·*//*·方法二:DO循環結合CONTINUE語句*/
data chap2.example2_1;
do x=5 to 30 by 5; /*設置x的起始值為5,終止值為30,步長為5*/
y=log(x); /*取y值為x的自然對數*/
if y>3 then continue; /*若y的值大于3就跳出循環*/
output;
format y 8.5; /*定義y的輸出形式為總長度為8位,小數部分占5位*/
end; /*結束循環*/
2.DO WHILE循環 格式: DO WHILE 循環的語法結構; DO WHILE(循環繼續條件); 循環體語句; END; 開始執行時,會先判斷循環條件表達式的邏輯結果是否為真,若為真,則繼續執行循環體語句,若為假則循環結束,循環語句每執行一次,檢驗循環條件一次
data chap2.example2_2;
y=100; /*取y的初始值為100*/
do x=2 to 10 by 2 while (y<3000); /* 當y的值小于3000時,繼續循環*/
y=y*x; /*y的值為前一次循環結束后y的值和當次循環x值的乘積*/
output;
end;
run;
3.DO UNTIL 循環 格式: DO UNTIL (循環退去條件); 循環體語句…; END; 語句開始執行,首循環體每執行一次都要判斷是否滿足循環退出條件,若循環退出條件表達式的邏輯結果為真,則退出循環,否則重復執行循環體語句
data chap2.example2_2;
y=100; /*取y的初始值為100*/
do x=2 to 10 by 2 until (y>=3000); /* 當y的值大于等于3000時,退出循環*/
y=y*x; /*y的值為前一次循環結束后y的值和當次循環x值的乘積*/
output;
end;
run;
上述三種循環都在DATA步完成,DO循環不設限定條件,DO WHILE和DO UNTIL循環都設置了循環條件;DO WHILE 的條件在循環體開頭,DO UNTIL的條件在循環體結束
##1.9條件分支結構 if-else in select
*if-else
data conditional;
length gender $1 quiz $2;
input age gender midterm quiz finalexam;
if missing(age) then agegroup=.; *缺失值
else if age lt 20 then agegeoup=1;
else if age lt 40 then agegroup=2;
else if age lt 60 then agegroup=3;
else if age ge 60 then agegroup=4;
datalines;
*寫if語句時,一般將最容易出現的語句寫在前面
if quiz="A+" or quiz="A" or quiz="B+" or quiz="B" then quizrange=1;
else if....
改為in
if quiz in ("A+" "A" "B+" "B") then quizrange=1
data conditional;
length gender$1 quiz$2;
input age gender midterm quiz finalexam;
select;
when (missing(age)) agegroup=.";
when (age lt 20) agegroup=1;
when (age lt 40) agegroup=2;
when (age lt 60)agrgroup=3:
when (age ge 60)agegroup=4;
otherwise;
end;
datalines;
......