/*
这里将描述性统计主要归于
means
(简单统计)
univariate
(复杂些的统计量可以检验统计分布)
freq
(频数统计,主要是属性变量分析)
此外便是绘图了(
gchart
块状图
gplot
条形图)
*/
proc means data=sasuser.wage n nmiss median p1 p5 p99 q1 q3 max min std stderr var skewness kurtosis t probt qrange;/*计算一系列统计量*/
var wage;
class sex;/*sex为分类变量*/
run;
proc sort data=sasuser.wage;
by race;/*按升序排序,之后使用by会用到*/
run;
proc means data=sasuser.wage;
var wage;
by race;/*by后是分类变量,使用by前分类变量必须先使用sort排过序才行*/
run;
data aa;/*构建一个数据集用来做之后的freq分析*/
input x y w @@;
cards;
1 1 0.0495 12 0.3168 1 3 0.2723 1 4 0.3614
2 1 0.1451 22 0.4038 2 3 0.2050 2 4 0.2461
3 1 0.2403 32 0.4264 3 3 0.1628 3 4 0.1705
4 1 0.3878 42 0.3673 4 3 0.1224 4 4 0.1224
5 1 0.4063 52 0.2813 5 3 0.1563 5 4 0.1563
6 1 0.3333 62 0.4167 6 3 0.0833 6 4 0.1667
7 1 0.5000 72 0.1667 7 3 0.3333 7 4 0.0000
8 1 0.5000 82 0.5000 8 3 0.0000 8 4 0.0000
9 1 0.2500 92 0.2500 9 3 0.5000 9 4 0.0000
10 1 0.833310 2 0.1667 10 3 0 10 4 0
;
run;
proc freq data=sasuser.wage;/*简单的频率分析,比较适用于属性数据*/
table wage sex;
run;
proc freq data=aa;/*freq分析*/
tables x*y /chisq;/*关联性检验采用fis,采用chisq检验,如果频数比较小宜采用fisher精确检验*/
weight w ;/*权数变量*/
run;
proc freq data=aa;/*列联表分析*/
table x*y/measures;/*关联度检验*/
weight w;
run;
proc univariate data=sasuser.wage ; /*描述性统计*/
var wage;
class region;
histogram wage;
run;
proc univariate data=sasuser.normal normal; /*检验数据集normal的变量normal1是否服从正态分布*/
var normal1;
run;
proc gchart data=sasuser.wage;/*V竖条形图*/
vbar wage/subgroup=sex;/*subgroup组内分组,group为组间分组*/
run;
proc gchart data=sasuser.wage;/*h横向条形图*/
hbar wage/subgroup=sex;
run;
proc gchart data=sasuser.wage;/*组内分组立体条形图*/
block wage/subgroup=sex;
run;
proc gchart data=sasuser.wage;/*分组横向条形图*/
block wage/group=sex;
run;
proc gchart data=sasuser.wage;/*三维饼图*/
pie3d sex;
run;
proc gchart data=sasuser.wage;/*星图*/
star wage;
run;
proc gchart data=sasuser.wage;/*环形图*/
donut wage;
run;
data sasuser.xy;/*建立一个数据集*/
input year y x2-x6 group;
cards;
1960 27.8 397.5 42.2 50.7 78.3 65.8 0
1961 29.9 413.3 38.1 52 79.2 66.9 1
1962 29.8 439.2 40.3 54 79.2 67.8 0
1963 30.8 459.7 39.5 55.3 79.2 69.6 1
1964 31.2 492.9 37.3 54.7 77.4 68.7 0
1965 33.3 528.6 38.1 63.7 80.2 73.6 1
1966 35.6 560.3 39.3 69.8 80.4 76.3 0
1967 36.4 624.6 37.8 65.9 83.9 77.2 1
1968 36.7 666.4 38.4 64.5 85.5 78.1 0
1969 38.4 717.8 40.1 70 93.7 84.7 1
1970 40.4 768.2 38.6 73.2 106.1 93.3 0
1971 40.3 843.3 39.8 67.8 104.8 89.7 1
1972 41.8 911.6 39.7 79.1 114 100.7 0
1973 40.4 931.1 52.1 95.4 124.1 113.5 1
1974 40.7 1021.5 48.9 94.2 127.6 115.3 0
1975 40.1 1165.9 58.3 123.5 142.9 136.7 1
1976 42.7 1349.6 57.9 129.9 143.6 139.2 0
1977 44.1 1449.4 56.5 117.6 139.2 132 1
1978 46.7 1575.5 63.7 130.9 165.5 132.1 0
1979 50.6 1759.1 61.6 129.8 203.3 154.4 1
1980 50.1 1994.2 58.9 128 219.6 174.9 1
1981 51.7 2258.1 66.4 141 221.6 180.8 0
1982 52.9 2478.7 70.4 168.2 232.6 189.4 1
;
run;
proc gplot data=sasuser.xy;/*折线图*/
plot y*x2;
symbol v=star cv=red ci=blue pointlabel i=join;
run;
proc gplot data=sasuser.xy;/*两条折线图*/
plot x2*y=group;/*group变量是分组变量*/
symbol1 c=greenv=dot cv=yellow ;
symbol2 c=blue v=star cv=orange i=join;
run;
quit;