Week 7
Describing Categorical Variables
/*st107d01.sas*/
/* Clear any existing title */
title;
/* Create a custom format for Bonus variable */
proc format;
value bonusfmt 1 = "Bonus Eligible"
0 = "Not Bonus Eligible";
run;
/* Perform frequency analysis of variables in the ameshousing3 dataset */
proc freq data=STAT1.ameshousing3;
/* Display frequency tables for Bonus, Fireplaces, Lot_Shape_2,
and the interaction between Fireplaces and Bonus,
and Lot_Shape_2 and Bonus */
tables Bonus Fireplaces Lot_Shape_2
Fireplaces*Bonus Lot_Shape_2*Bonus/
plots(only)=freqplot(scale=percent); /* Display frequency plots for each table */
format Bonus bonusfmt.; /* Apply the custom format to Bonus variable */
run;
/* Perform univariate analysis on Basement_Area variable in the ameshousing3 dataset */
proc univariate data=STAT1.ameshousing3 noprint;
class Bonus; /* Separate analysis by Bonus categories */
var Basement_Area; /* Analyze the Basement_Area variable */
histogram Basement_Area; /* Display a histogram for Basement_Area */
/* Display summary statistics (mean, standard deviation, median, minimum, maximum) */
inset mean std median min max / format=5.2 position=nw;
format Bonus bonusfmt.; /* Apply the custom format to Bonus variable */
run;
Result
/*st107s01.sas*/ /*Part A*/
/* Disable ODS graphics */
ods graphics off;
/* Perform frequency analysis on safety dataset */
proc freq data=STAT1.safety;
/* Display frequency tables for Unsafe, Type, Region, and Size */
tables Unsafe Type Region Size;
title "Safety Data Frequencies";
run;
/* Enable ODS graphics */
ods graphics on;
/*st107s01.sas*/ /*Part B*/
/* Create a custom format for Unsafe variable */
proc format;
value safefmt 0='Average or Above'
1='Below Average';
run;
/* Perform frequency analysis on safety dataset */
proc freq data=STAT1.safety;
/* Display frequency tables and statistics for interaction between Unsafe and Region */
tables Region*Unsafe / expected chisq relrisk;
format Unsafe safefmt.; /* Apply the custom format to Unsafe variable */
title "Association between Unsafe and Region";
run;
/*st107s01.sas*/ /*Part C*/
/* Perform frequency analysis on safety dataset */
proc freq data=STAT1.safety;
/* Display frequency tables and statistics for interaction between Unsafe and Size */
tables Size*Unsafe / chisq measures cl;
format Unsafe safefmt.; /* Apply the custom format to Unsafe variable */
title "Association between Unsafe and Size";
run;
Result
Tests of Association
/*st107d02.sas*/
/* Disable ODS graphics */
ods graphics off;
/* Perform frequency analysis on ameshousing3 dataset */
proc freq data=STAT1.ameshousing3;
/* Display frequency tables and statistics for interaction between Lot_Shape_2, Fireplaces, and Bonus */
tables (Lot_Shape_2 Fireplaces)*Bonus
/ chisq expected cellchi2 nocol nopercent
relrisk;
format Bonus bonusfmt.; /* Apply the custom format to Bonus variable */
title 'Associations with Bonus';
run;
/* Enable ODS graphics */
ods graphics on;
Result
/*st107d03.sas*/
/* Disable ODS graphics */
ods graphics off;
/* Perform frequency analysis on ameshousing3 dataset */
proc freq data=STAT1.ameshousing3;
/* Display frequency tables and statistics for interaction between Fireplaces and Bonus */
tables Fireplaces*Bonus / chisq measures cl;
format Bonus bonusfmt.; /* Apply the custom format to Bonus variable */
title 'Ordinal Association between FIREPLACES and BONUS?';
run;
/* Enable ODS graphics */
ods graphics on;
Result
Logistic Regression
/*st107d04.sas*/
/* Enable ODS graphics */
ods graphics on;
/* Perform logistic regression on ameshousing3 dataset */
proc logistic data=STAT1.ameshousing3 alpha=0.05
plots(only)=(effect oddsratio); /* Display effect and oddsratio plots */
/* Model the probability of Bonus (event='1') as a function of Basement_Area */
model Bonus(event='1')=Basement_Area / clodds=pl;
title 'LOGISTIC MODEL (1):Bonus=Basement_Area';
run;
Result
/*st107s02.sas*/
/* Enable ODS graphics */
ods graphics on;
/* Perform logistic regression on the safety dataset */
proc logistic data=STAT1.safety plots(only)=(effect oddsratio); /* Display effect and oddsratio plots */
/* Model the probability of Unsafe (event='1') as a function of Weight */
model Unsafe(event='1')=Weight / clodds=pl;
title 'LOGISTIC MODEL (1):Unsafe=Weight';
run;
Result
Logistic Regression
/*st107d05.sas*/
/* Enable ODS graphics to display output plots */
ods graphics on;
/* Run logistic regression using PROC LOGISTIC */
proc logistic data=STAT1.ameshousing3 plots(only)=(effect oddsratio);
/* Define reference levels for categorical variables */
class Fireplaces(ref='0') Lot_Shape_2(ref='Regular') / param=ref;
/* Specify the logistic regression model with the dependent variable 'Bonus' (binary) and the independent variables 'Basement_Area', 'Fireplaces', and 'Lot_Shape_2' */
model Bonus(event='1')=Basement_Area Fireplaces Lot_Shape_2 / clodds=pl;
/* Specify the unit change for 'Basement_Area' for interpreting the coefficients */
units Basement_Area=100;
/* Set a title for the analysis */
title 'LOGISTIC MODEL (2):Bonus= Basement_Area Fireplaces Lot_Shape_2';
run;
Result
/*st107s03.sas*/
/* Enable ODS graphics to display output plots */
ods graphics on;
/* Run logistic regression using PROC LOGISTIC */
proc logistic data=STAT1.safety plots(only)=(effect oddsratio);
/* Define reference levels and parameterization for categorical variables */
class Region (param=ref ref='Asia')
Size (param=ref ref='3');
/* Specify the logistic regression model with the dependent variable 'Unsafe' (binary) and the independent variables 'Weight', 'Region', and 'Size' */
model Unsafe(event='1')=Weight Region Size / clodds=pl;
/* Set a title for the analysis */
title 'LOGISTIC MODEL (2):Unsafe=Weight Region Size';
run;
Result
Stepwise Selection with Interactions
/*st107d06.sas*/ /*Part A*/
/* Run logistic regression using PROC LOGISTIC with backward elimination */
proc logistic data=STAT1.ameshousing3 plots(only)=(effect oddsratio);
/* Define reference levels for categorical variables */
class Fireplaces(ref='0') Lot_Shape_2(ref='Regular') / param=ref;
/* Specify the logistic regression model with interactions and backward elimination */
model Bonus(event='1')=Basement_Area|Fireplaces|Lot_Shape_2 @2 /
selection=backward clodds=pl slstay=0.10;
/* Specify the unit change for 'Basement_Area' for interpreting the coefficients */
units Basement_Area=100;
/* Set a title for the analysis */
title 'LOGISTIC MODEL (3): Backward Elimination '
'Bonus=Basement_Area|Fireplaces|Lot_Shape_2';
run;
/*st107d06.sas*/ /*Part B*/
/* Run logistic regression using PROC LOGISTIC with odds ratio plots */
proc logistic data=STAT1.ameshousing3
plots(only)=oddsratio(range=clip);
/* Define reference levels for categorical variables */
class Fireplaces(ref='0') Lot_Shape_2(ref='Regular') / param=ref;
/* Specify the logistic regression model with interactions */
model Bonus(event='1')=Basement_Area|Lot_Shape_2 Fireplaces;
/* Specify the unit change for 'Basement_Area' for interpreting the coefficients */
units Basement_Area=100;
/* Calculate odds ratios for 'Basement_Area' at different levels of 'Lot_Shape_2' */
oddsratio Basement_Area / at (Lot_Shape_2=ALL) cl=pl;
/* Calculate odds ratios for 'Lot_Shape_2' at different levels of 'Basement_Area' */
oddsratio Lot_Shape_2 / at (Basement_Area=1000 1500) cl=pl;
/* Set a title for the analysis */
title 'LOGISTIC MODEL (3.1): Bonus=Basement_Area|Lot_Shape_2 Fireplaces';
run;
Result
/*st107d07.sas*/
/* Suppress output from the PROC LOGISTIC */
ods select none;
/* Run logistic regression using PROC LOGISTIC */
proc logistic data=STAT1.ameshousing3;
/* Define reference levels for categorical variables */
class Fireplaces(ref='0') Lot_Shape_2(ref='Regular') / param=ref;
/* Specify the logistic regression model */
model Bonus(event='1')=Basement_Area|Lot_Shape_2 Fireplaces;
/* Specify the unit change for 'Basement_Area' for interpreting the coefficients */
units Basement_Area=100;
/* Save the model to be used later in PROC PLM */
store out=isbonus;
run;
/* Enable output display again */
ods select all;
/* Create a new dataset 'newhouses' with new observations */
data newhouses;
length Lot_Shape_2 $9;
input Fireplaces Lot_Shape_2 $ Basement_Area;
datalines;
0 Regular 1060
2 Regular 775
2 Irregular 1100
1 Irregular 975
1 Regular 800
;
run;
/* Use PROC PLM to score new observations with the stored model 'isbonus' */
proc plm restore=isbonus;
score data=newhouses out=scored_houses / ILINK;
title 'Predictions using PROC PLM';
run;
/* Print the scored dataset 'scored_houses' */
proc print data=scored_houses;
run;
Result
/*st107s04.sas*/
/* Enable ODS graphics to display output plots */
ods graphics on;
/* Run logistic regression using PROC LOGISTIC with backward elimination */
proc logistic data=STAT1.safety plots(only)=(effect oddsratio);
/* Define reference levels and parameterization for categorical variables */
class Region (param=ref ref='Asia')
Size (param=ref ref='Small');
/* Specify the logistic regression model with backward elimination */
model Unsafe(event='1') = Weight Region Size
/ clodds=pl selection=backward;
/* Specify the unit change for 'Weight' for interpreting the coefficients */
units Weight = -1;
/* Save the model to be used later in PROC PLM */
store isSafe;
/* Apply the format 'sizefmt.' to the 'Size' variable */
format Size sizefmt.;
/* Set a title for the analysis */
title 'Logistic Model: Backwards Elimination';
run;
/* Create a new dataset 'checkSafety' with new observations */
data checkSafety;
length Region $9.;
input Weight Size Region $ 5-13;
datalines;
4 1 N America
3 1 Asia
5 3 Asia
5 2 N America
;
run;
/* Use PROC PLM to score new observations with the stored model 'isSafe' */
proc plm restore=isSafe;
score data=checkSafety out=scored_cars / ILINK;
title 'Safety Predictions using PROC PLM';
run;
/* Print the scored dataset 'scored_cars' */
proc print data=scored_cars;
run;
Result