SAS 数据分析实例之数据描述、预处理和抽样
生活随笔
收集整理的這篇文章主要介紹了
SAS 数据分析实例之数据描述、预处理和抽样
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
1 描述統計
1.1 描述表屬性
ods noproctitle; ods select attributes variables enginehost directory;proc datasets lib=SASHELP;contents data=SASHELP.CARS order=collate out=WORK.TableAttributes (label="Contents Details for SASHELP.CARS"); quit;proc print; run;1.2 描述數據特征
1.2.1 分析分類變量
title "分類變量的頻數";proc freq data=SASHELP.CARS;tables Make Model Type Origin DriveTrain / plots=(freqplot) missing; run;1.2.2 分析數值變量
title "數值變量的描述性統計量";proc means data=SASHELP.CARS n nmiss min mean median max std;var MSRP Invoice EngineSize Cylinders Horsepower MPG_City MPG_Highway Weight Wheelbase Length; run;title;proc univariate data=SASHELP.CARS noprint;histogram MSRP Invoice EngineSize Cylinders Horsepower MPG_City MPG_Highway Weight Wheelbase Length; run;1.3 描述缺失數據
ods noproctitle;proc format;value _nmissprint low-high="非缺失";value $_cmissprint " "=" " other="非缺失"; run;proc freq data=SASHELP.CARS;title3 "缺失數據頻數";title4 h=2 "圖例: .、A、B,其他 = 缺失";format MSRP Invoice EngineSize Cylinders Horsepower MPG_City MPG_Highway Weight Wheelbase Length _nmissprint.;format Make Model Type Origin DriveTrain $_cmissprint.;tables Make Model Type Origin DriveTrain MSRP Invoice EngineSize Cylinders Horsepower MPG_City MPG_Highway Weight Wheelbase Length / missing nocum; run;proc freq data=SASHELP.CARS noprint;table Make * Model * Type * Origin * DriveTrain * MSRP * Invoice * EngineSize * Cylinders * Horsepower * MPG_City * MPG_Highway * Weight * Wheelbase * Length / missing out=Work._MissingData_;format MSRP Invoice EngineSize Cylinders Horsepower MPG_City MPG_Highway Weight Wheelbase Length _nmissprint.;format Make Model Type Origin DriveTrain $_cmissprint.; run;proc print data=Work._MissingData_ noobs label;title3 "跨變量的缺失數據模式";title4 h=2 "圖例: .、A、B,其他 = 缺失";format MSRP Invoice EngineSize Cylinders Horsepower MPG_City MPG_Highway Weight Wheelbase Length _nmissprint.;format Make Model Type Origin DriveTrain $_cmissprint.;label count="頻數" percent="百分比"; run;title3;proc delete data=Work._MissingData_; run;2 數據預處理
2.1 列出數據
title1 "列出數據 - SASHELP.BASEBALL";proc sort data=SASHELP.BASEBALL out=WORK.SORTTEMP;by Team; run;proc print data=WORK.SORTTEMP label n;var Position;by Team;sum Salary; run;proc delete data=work.SORTTEMP; run;title1;2.2 過濾數據
proc sql noprint;create table WORK.filter as select * from SASHELP.BASEBALL where(Salary LT 100 AND Position EQ "CF"); quit;2.3 排序數據
proc sort data=SASHELP.BASEBALL out=WORK.sortDS noequals;by descending Salary; run;2.4 排名數據
proc rank data=SASHELP.BASEBALL descending out=WORK.Rank;var Salary;ranks rank_Salary; run;2.5 轉換數據
data WORK.transform;set SASHELP.BASEBALL;log_Salary=log(Salary);inv_CrHits=1 / CrHits; run;2.6 標準化數據
proc stdize data=SASHELP.BASEBALL method=std nomiss out=WORK.Stdize oprefix sprefix=Standardized_;var Salary nHits nAtBat; run;2.7 重置碼值
data WORK.recodedValues;set SASHELP.BASEBALL;select (Salary);when (100) _recodeVar_=99999;when (200) _recodeVar_=88888;otherwise _recodeVar_=Salary;end; run;2.8 重置碼范圍
data WORK.recodedRanges;set SASHELP.BASEBALL;select;when (-1 <=Salary <=100) _recodeVar_=100;otherwise _recodeVar_=Salary;end; run;3 隨機抽樣
3.1 簡單隨機抽樣
3.1.1 無放回不重復抽樣
proc surveyselect data=SASHELP.BASEBALL out=WORK.RandomSample method=srs samprate=0.3 seed=2019;strata Position / alloc=prop; run;3.1.2 有放回重復抽樣
proc surveyselect data=SASHELP.BASEBALL out=WORK.RandomSample outhits method=urs sampsize=30 seed=2019;strata Position / alloc=prop; run;3.2 分層隨機抽樣
3.2.1 無放回不重復抽樣
proc sort data=SASHELP.BASEBALL out=WORK.SORTTempTableSorted;by Position; run;proc surveyselect data=WORK.SORTTempTableSorted out=WORK.RandomSample method=srs samprate=0.3 seed=2019;strata Position / alloc=prop; run;3.2.2 有放回重復抽樣
proc sort data=SASHELP.BASEBALL out=WORK.SORTTempTableSorted;by Position; run;proc surveyselect data=WORK.SORTTempTableSorted out=WORK.RandomSample outhits method=urs sampsize=30 seed=2019;strata Position / alloc=prop; run;總結
以上是生活随笔為你收集整理的SAS 数据分析实例之数据描述、预处理和抽样的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 录制回放模式创建测试用例 - Katal
- 下一篇: 污水处理程序西门子Wincc+S7-30