Week 7

Describing Categorical Variables

/*st107d01.sas*/
/* Clear any existing title */
title;

/* Create a custom format for Bonus variable */
proc format;
    value bonusfmt 1 = "Bonus Eligible"
                   0 = "Not Bonus Eligible";
run;

/* Perform frequency analysis of variables in the ameshousing3 dataset */
proc freq data=STAT1.ameshousing3;
    /* Display frequency tables for Bonus, Fireplaces, Lot_Shape_2, 
       and the interaction between Fireplaces and Bonus, 
       and Lot_Shape_2 and Bonus */
    tables Bonus Fireplaces Lot_Shape_2
           Fireplaces*Bonus Lot_Shape_2*Bonus/
           plots(only)=freqplot(scale=percent); /* Display frequency plots for each table */
    format Bonus bonusfmt.; /* Apply the custom format to Bonus variable */
run;

/* Perform univariate analysis on Basement_Area variable in the ameshousing3 dataset */
proc univariate data=STAT1.ameshousing3 noprint;
    class Bonus; /* Separate analysis by Bonus categories */
    var Basement_Area; /* Analyze the Basement_Area variable */
    histogram Basement_Area; /* Display a histogram for Basement_Area */
    /* Display summary statistics (mean, standard deviation, median, minimum, maximum) */
    inset mean std median min max / format=5.2 position=nw; 
    format Bonus bonusfmt.; /* Apply the custom format to Bonus variable */
run;

Result

/*st107s01.sas*/  /*Part A*/
/* Disable ODS graphics */
ods graphics off;
/* Perform frequency analysis on safety dataset */
proc freq data=STAT1.safety;
    /* Display frequency tables for Unsafe, Type, Region, and Size */
    tables Unsafe Type Region Size;
    title "Safety Data Frequencies";
run;
/* Enable ODS graphics */
ods graphics on;

/*st107s01.sas*/  /*Part B*/
/* Create a custom format for Unsafe variable */
proc format; 
   value safefmt 0='Average or Above'
                 1='Below Average';
run;
/* Perform frequency analysis on safety dataset */
proc freq data=STAT1.safety;
    /* Display frequency tables and statistics for interaction between Unsafe and Region */
    tables Region*Unsafe / expected chisq relrisk;
    format Unsafe safefmt.; /* Apply the custom format to Unsafe variable */
    title "Association between Unsafe and Region";
run;

/*st107s01.sas*/  /*Part C*/
/* Perform frequency analysis on safety dataset */
proc freq data=STAT1.safety;
    /* Display frequency tables and statistics for interaction between Unsafe and Size */
    tables Size*Unsafe / chisq measures cl;
    format Unsafe safefmt.; /* Apply the custom format to Unsafe variable */
    title "Association between Unsafe and Size";
run;

Result

Tests of Association

/*st107d02.sas*/
/* Disable ODS graphics */
ods graphics off;
/* Perform frequency analysis on ameshousing3 dataset */
proc freq data=STAT1.ameshousing3;
    /* Display frequency tables and statistics for interaction between Lot_Shape_2, Fireplaces, and Bonus */
    tables (Lot_Shape_2 Fireplaces)*Bonus
          / chisq expected cellchi2 nocol nopercent 
            relrisk;
    format Bonus bonusfmt.; /* Apply the custom format to Bonus variable */
    title 'Associations with Bonus';
run;
/* Enable ODS graphics */
ods graphics on;

Result

/*st107d03.sas*/
/* Disable ODS graphics */
ods graphics off;
/* Perform frequency analysis on ameshousing3 dataset */
proc freq data=STAT1.ameshousing3;
    /* Display frequency tables and statistics for interaction between Fireplaces and Bonus */
    tables Fireplaces*Bonus / chisq measures cl;
    format Bonus bonusfmt.; /* Apply the custom format to Bonus variable */
    title 'Ordinal Association between FIREPLACES and BONUS?';
run;
/* Enable ODS graphics */
ods graphics on;

Result

Logistic Regression

/*st107d04.sas*/
/* Enable ODS graphics */
ods graphics on;
/* Perform logistic regression on ameshousing3 dataset */
proc logistic data=STAT1.ameshousing3 alpha=0.05
              plots(only)=(effect oddsratio); /* Display effect and oddsratio plots */
    /* Model the probability of Bonus (event='1') as a function of Basement_Area */
    model Bonus(event='1')=Basement_Area / clodds=pl;
    title 'LOGISTIC MODEL (1):Bonus=Basement_Area';
run;

Result

/*st107s02.sas*/
/* Enable ODS graphics */
ods graphics on;
/* Perform logistic regression on the safety dataset */
proc logistic data=STAT1.safety plots(only)=(effect oddsratio); /* Display effect and oddsratio plots */
    /* Model the probability of Unsafe (event='1') as a function of Weight */
    model Unsafe(event='1')=Weight / clodds=pl;
    title 'LOGISTIC MODEL (1):Unsafe=Weight';
run;

Result

Logistic Regression

/*st107d05.sas*/

/* Enable ODS graphics to display output plots */
ods graphics on;

/* Run logistic regression using PROC LOGISTIC */
proc logistic data=STAT1.ameshousing3 plots(only)=(effect oddsratio);
    /* Define reference levels for categorical variables */
    class Fireplaces(ref='0') Lot_Shape_2(ref='Regular') / param=ref;

    /* Specify the logistic regression model with the dependent variable 'Bonus' (binary) and the independent variables 'Basement_Area', 'Fireplaces', and 'Lot_Shape_2' */
    model Bonus(event='1')=Basement_Area Fireplaces Lot_Shape_2 / clodds=pl;

    /* Specify the unit change for 'Basement_Area' for interpreting the coefficients */
    units Basement_Area=100;

    /* Set a title for the analysis */
    title 'LOGISTIC MODEL (2):Bonus= Basement_Area Fireplaces Lot_Shape_2';
run;

Result

/*st107s03.sas*/

/* Enable ODS graphics to display output plots */
ods graphics on;

/* Run logistic regression using PROC LOGISTIC */
proc logistic data=STAT1.safety plots(only)=(effect oddsratio);
    /* Define reference levels and parameterization for categorical variables */
    class Region (param=ref ref='Asia')
          Size (param=ref ref='3');

    /* Specify the logistic regression model with the dependent variable 'Unsafe' (binary) and the independent variables 'Weight', 'Region', and 'Size' */
    model Unsafe(event='1')=Weight Region Size / clodds=pl;

    /* Set a title for the analysis */
    title 'LOGISTIC MODEL (2):Unsafe=Weight Region Size';
run;

Result

Stepwise Selection with Interactions

/*st107d06.sas*/  /*Part A*/

/* Run logistic regression using PROC LOGISTIC with backward elimination */
proc logistic data=STAT1.ameshousing3 plots(only)=(effect oddsratio);
    /* Define reference levels for categorical variables */
    class Fireplaces(ref='0') Lot_Shape_2(ref='Regular') / param=ref;

    /* Specify the logistic regression model with interactions and backward elimination */
    model Bonus(event='1')=Basement_Area|Fireplaces|Lot_Shape_2 @2 / 
          selection=backward clodds=pl slstay=0.10;

    /* Specify the unit change for 'Basement_Area' for interpreting the coefficients */
    units Basement_Area=100;

    /* Set a title for the analysis */
    title 'LOGISTIC MODEL (3): Backward Elimination '
           'Bonus=Basement_Area|Fireplaces|Lot_Shape_2';
run;

/*st107d06.sas*/  /*Part B*/

/* Run logistic regression using PROC LOGISTIC with odds ratio plots */
proc logistic data=STAT1.ameshousing3 
              plots(only)=oddsratio(range=clip);
    /* Define reference levels for categorical variables */
    class Fireplaces(ref='0') Lot_Shape_2(ref='Regular') / param=ref;

    /* Specify the logistic regression model with interactions */
    model Bonus(event='1')=Basement_Area|Lot_Shape_2 Fireplaces;

    /* Specify the unit change for 'Basement_Area' for interpreting the coefficients */
    units Basement_Area=100;

    /* Calculate odds ratios for 'Basement_Area' at different levels of 'Lot_Shape_2' */
    oddsratio Basement_Area / at (Lot_Shape_2=ALL) cl=pl;

    /* Calculate odds ratios for 'Lot_Shape_2' at different levels of 'Basement_Area' */
    oddsratio Lot_Shape_2 / at (Basement_Area=1000 1500) cl=pl;

    /* Set a title for the analysis */
    title 'LOGISTIC MODEL (3.1): Bonus=Basement_Area|Lot_Shape_2 Fireplaces';
run;

Result

/*st107d07.sas*/

/* Suppress output from the PROC LOGISTIC */
ods select none;

/* Run logistic regression using PROC LOGISTIC */
proc logistic data=STAT1.ameshousing3;
    /* Define reference levels for categorical variables */
    class Fireplaces(ref='0') Lot_Shape_2(ref='Regular') / param=ref;

    /* Specify the logistic regression model */
    model Bonus(event='1')=Basement_Area|Lot_Shape_2 Fireplaces;

    /* Specify the unit change for 'Basement_Area' for interpreting the coefficients */
    units Basement_Area=100;

    /* Save the model to be used later in PROC PLM */
    store out=isbonus;
run;

/* Enable output display again */
ods select all;

/* Create a new dataset 'newhouses' with new observations */
data newhouses;
    length Lot_Shape_2 $9;
    input Fireplaces Lot_Shape_2 $ Basement_Area;
    datalines;
    0  Regular    1060
    2  Regular     775
    2  Irregular  1100
    1  Irregular   975
    1  Regular     800
    ;
run;

/* Use PROC PLM to score new observations with the stored model 'isbonus' */
proc plm restore=isbonus;
    score data=newhouses out=scored_houses / ILINK;
    title 'Predictions using PROC PLM';
run;

/* Print the scored dataset 'scored_houses' */
proc print data=scored_houses;
run;

Result

/*st107s04.sas*/

/* Enable ODS graphics to display output plots */
ods graphics on;

/* Run logistic regression using PROC LOGISTIC with backward elimination */
proc logistic data=STAT1.safety plots(only)=(effect oddsratio);
    /* Define reference levels and parameterization for categorical variables */
    class Region (param=ref ref='Asia')
          Size (param=ref ref='Small');

    /* Specify the logistic regression model with backward elimination */
    model Unsafe(event='1') = Weight Region Size
                  / clodds=pl selection=backward;

    /* Specify the unit change for 'Weight' for interpreting the coefficients */
    units Weight = -1;

    /* Save the model to be used later in PROC PLM */
    store isSafe;

    /* Apply the format 'sizefmt.' to the 'Size' variable */
    format Size sizefmt.;

    /* Set a title for the analysis */
    title 'Logistic Model: Backwards Elimination';
run;

/* Create a new dataset 'checkSafety' with new observations */
data checkSafety;
    length Region $9.;
    input Weight Size Region $ 5-13;
    datalines;
4 1 N America
3 1 Asia     
5 3 Asia     
5 2 N America
    ;
run;

/* Use PROC PLM to score new observations with the stored model 'isSafe' */
proc plm restore=isSafe;
    score data=checkSafety out=scored_cars / ILINK;
    title 'Safety Predictions using PROC PLM';
run;

/* Print the scored dataset 'scored_cars' */
proc print data=scored_cars;
run;

Result