This project will be using the same dataset as was used in the fractional factorial project. To recap, this is the Housing dataset from the Ecdat package. Housing looks at the sales prices of houses in Windsor. It is a dataframe consiting of a cross-section from 1987 with 546 observations. The factors we will be focusing on can be seen below:
Two-Level Independent Variable : driveway - Does the house has a driveway ? Two-Level Independent Variable : fullbase - Does the house has a full finished basement ? Three-Level Independent Variable: stories - Stories number of stories excluding basement Three-Level Independent Variable: bathrms - Number of full bathrooms
library("Ecdat")
## Loading required package: Ecfun
##
## Attaching package: 'Ecfun'
## The following object is masked from 'package:base':
##
## sign
##
## Attaching package: 'Ecdat'
## The following object is masked from 'package:datasets':
##
## Orange
data("Housing")
Below is the first 6 rows of data:
head(Housing)
## price lotsize bedrooms bathrms stories driveway recroom fullbase gashw
## 1 42000 5850 3 1 2 yes no yes no
## 2 38500 4000 2 1 1 yes no no no
## 3 49500 3060 3 1 1 yes no no no
## 4 60500 6650 3 1 2 yes yes no no
## 5 61000 6360 2 1 1 yes no no no
## 6 66000 4160 3 1 1 yes yes yes no
## airco garagepl prefarea
## 1 no 1 no
## 2 no 0 no
## 3 no 0 no
## 4 no 0 no
## 5 no 0 no
## 6 yes 0 no
Below is the summary of the Unemployment dataframe:
summary(Housing)
## price lotsize bedrooms bathrms
## Min. : 25000 Min. : 1650 Min. :1.000 Min. :1.000
## 1st Qu.: 49125 1st Qu.: 3600 1st Qu.:2.000 1st Qu.:1.000
## Median : 62000 Median : 4600 Median :3.000 Median :1.000
## Mean : 68122 Mean : 5150 Mean :2.965 Mean :1.286
## 3rd Qu.: 82000 3rd Qu.: 6360 3rd Qu.:3.000 3rd Qu.:2.000
## Max. :190000 Max. :16200 Max. :6.000 Max. :4.000
## stories driveway recroom fullbase gashw airco
## Min. :1.000 no : 77 no :449 no :355 no :521 no :373
## 1st Qu.:1.000 yes:469 yes: 97 yes:191 yes: 25 yes:173
## Median :2.000
## Mean :1.808
## 3rd Qu.:2.000
## Max. :4.000
## garagepl prefarea
## Min. :0.0000 no :418
## 1st Qu.:0.0000 yes:128
## Median :0.0000
## Mean :0.6923
## 3rd Qu.:1.0000
## Max. :3.0000
For this project we needed 2 two-level factors and 2 three-factor factors. As you can see below, the two “three-level” factors that I chose were originally four-level factors.
In order to get a dataframe that would fulfill the requirements of this project, I created two subsets. First, I removed all the observations with 4 stories and then all the observations with 4 bathrooms, to finally end up with the dataframe I will use: Housing3.
Housing2 = subset(Housing, stories != "4")
Housing3 = subset(Housing2, bathrms != "4")
Now when we set these variables to factors and check the levels we should be able to see the updated amount of levels. During this step for some reason R keeps the amount of levels the same as before so it is important to use the “droplevels” function.
Housing3$bathrms <- as.factor(Housing3$bathrms)
Housing3$stories <- as.factor(Housing3$stories)
levels(droplevels(Housing3$bathrms))
## [1] "1" "2" "3"
levels(droplevels(Housing3$stories))
## [1] "1" "2" "3"
There are two continuos variables in this dataframe; the sale price of the house in dollars, price, and the lot size of the property in square feet, lotsize.
The response variable for this dataframe is the sale price of the house measured in dollars, price.
The dataframe is now going to be altered again so that it is condensed into only response variable price, and the independent variables bathrms, stories, driveway, fullbase.
Housing3$lotsize <- NULL
Housing3$bedrooms <- NULL
Housing3$recroom <- NULL
Housing3$gashw <- NULL
Housing3$airco <- NULL
Housing3$garagepl <- NULL
Housing3$prefarea <- NULL
head(droplevels(Housing3))
## price bathrms stories driveway fullbase
## 1 42000 1 2 yes yes
## 2 38500 1 1 yes no
## 3 49500 1 1 yes no
## 4 60500 1 2 yes no
## 5 61000 1 1 yes no
## 6 66000 1 1 yes yes
summary(droplevels(Housing3))
## price bathrms stories driveway fullbase
## Min. : 25000 1:390 1:227 no : 77 no :317
## 1st Qu.: 48000 2:106 2:238 yes:428 yes:188
## Median : 60000 3: 9 3: 40
## Mean : 65292
## 3rd Qu.: 77500
## Max. :190000
str(droplevels(Housing3))
## 'data.frame': 505 obs. of 5 variables:
## $ price : num 42000 38500 49500 60500 61000 66000 66000 69000 83800 90000 ...
## $ bathrms : Factor w/ 3 levels "1","2","3": 1 1 1 1 1 1 2 1 1 2 ...
## $ stories : Factor w/ 3 levels "1","2","3": 2 1 1 2 1 1 2 3 1 1 ...
## $ driveway: Factor w/ 2 levels "no","yes": 2 2 2 2 2 2 2 2 2 2 ...
## $ fullbase: Factor w/ 2 levels "no","yes": 2 1 1 1 1 2 2 1 2 2 ...
In order to use these factors for a factorial design, we needed to define the factors with levels of high and low. For the 2 three-level factors they are already set this way, but the 2 two-level factors need to be set below to have their levels altered so that yes = 1 and no = 0.
# Find length of frame
l = nrow(Housing3)
# Create four new frames for writing numbers instead of factors
drivewaynum = data.frame(l)
fullbasenum = data.frame(l)
storiesnum = data.frame(l)
bathrmsnum = data.frame(l)
## Replacement loop
for (i in 1:l){
# driveway replace yes with 1 and no with 0
if (Housing3$driveway[i] == "yes"){
drivewaynum[i,1] <- 1
} else {
drivewaynum[i,1] <- 0
}
# fullbase replace yes with 1 and no with 0
if (Housing3$fullbase[i] == "yes"){
fullbasenum[i,1] <- 1
} else {
fullbasenum[i,1] <- 0
}
#stories 1 = 0, 2 = 2, 3 = 2
if (Housing3$stories[i] == "1"){
storiesnum[i,1] <- 0
}
if (Housing3$stories[i] == "2") {
storiesnum[i,1] <- 1
}
if (Housing3$stories[i] == "3") {
storiesnum[i,1] <- 2
}
#bathrms 1 = 0, 2 = 1, 3 = 2
if (Housing3$bathrms[i] == "1"){
bathrmsnum[i,1] <- 0
}
if (Housing3$bathrms[i] == "2") {
bathrmsnum[i,1] <- 1
}
if (Housing3$bathrms[i] == "3") {
bathrmsnum[i,1] <- 2
}
}
# Create numbered data frame out of column vectors and response variable
Housing4 <- cbind(drivewaynum,fullbasenum,storiesnum,bathrmsnum,Housing3$price)
# Title columns of data frame appropriately
colnames(Housing4) <- c("driveway","full","stories","bathrooms","price")
head(Housing4)
## driveway full stories bathrooms price
## 1 1 1 1 0 42000
## 2 1 0 0 0 38500
## 3 1 0 0 0 49500
## 4 1 0 1 0 60500
## 5 1 0 0 0 61000
## 6 1 1 0 0 66000
str(Housing4)
## 'data.frame': 505 obs. of 5 variables:
## $ driveway : num 1 1 1 1 1 1 1 1 1 1 ...
## $ full : num 1 0 0 0 0 1 1 0 1 1 ...
## $ stories : num 1 0 0 1 0 0 1 2 0 0 ...
## $ bathrooms: num 0 0 0 0 0 0 1 0 0 1 ...
## $ price : num 42000 38500 49500 60500 61000 66000 66000 69000 83800 90000 ...
summary(Housing4)
## driveway full stories bathrooms
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:1.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :1.0000 Median :0.0000 Median :1.0000 Median :0.0000
## Mean :0.8475 Mean :0.3723 Mean :0.6297 Mean :0.2455
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :2.0000 Max. :2.0000
## price
## Min. : 25000
## 1st Qu.: 48000
## Median : 60000
## Mean : 65292
## 3rd Qu.: 77500
## Max. :190000
We will use a linear model in order to understand whether the variation in driveway, full, stories, and bathrooms effects the variation in the price of a house.
Null Hypothesis: No factor with have an effect on the price of a house.
Alternate Hypothesis: At least one factor will have an effect on the price of a house.
We will use a Response Surface Method in order to estimate residuals and coefficients. We will use an ANOA to analyze main effects of driveway, full, stories, and bathrooms.
Response Surface Methods are used for the goal of optimization, or to find the factor level combinations that will give us the best result. In this experiment we are looking to find what combination of driveway, full, stories, and bathrooms will give us the maximum price of a house.
There was not a randomization scheme used in this recipe
There were not replicates or repeated measures in this data set.
Originally the housing data set had 12 factors
Below you can see the summary of our data again, as well as boxplots of the response variable, price against the four IVs:
summary(Housing4)
## driveway full stories bathrooms
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:1.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :1.0000 Median :0.0000 Median :1.0000 Median :0.0000
## Mean :0.8475 Mean :0.3723 Mean :0.6297 Mean :0.2455
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :2.0000 Max. :2.0000
## price
## Min. : 25000
## 1st Qu.: 48000
## Median : 60000
## Mean : 65292
## 3rd Qu.: 77500
## Max. :190000
Below are boxplots to show the difference between levels of each factors on price.
boxplot(Housing4$price ~ Housing4$full + Housing4$driveway + Housing4$stories + Housing4$bathrooms, xlab= "fullbasement + driveway + stories + bathrooms",ylab="price of house", main="Histogram of Price and four IVs")
Below are some boxplots of the response variable against the four IV:
boxplot(Housing4$price~Housing4$driveway, main="Affect of Driveway on House Price", ylab="House Price", xlab="1= Driveway, 0 = No Driveway")
There is a noticable difference above in the median house price between factor levels.
boxplot(Housing4$price~Housing4$full, main="Affect of Full Basement on House Price", ylab="House Price", xlab="1= Full Basement, 0 = No Full Basement")
There is a noticible increase in the median price of a house with a full basement verses a house without a full basement.
boxplot(Housing4$price~Housing4$stories, main="Affect of Number of Stories on House Price", ylab="House Price", xlab="0 = 1 Story, 1 = Two Stories 2 = Three Stories")
The difference in prices between one and two stories is slight, but noticiable. The increase in house price when the numbers of stories increases to three stories is quite large.
boxplot(Housing4$price~Housing4$bathrooms, main="Affect of Number of Bathrooms on House Price", ylab="House Price", xlab="0 = 1 Bathrooms, 1 = Two Bathrooms 2 = Three Bathrooms")
It is clear to see the the median House Price increases as the amount of bathrooms increases.
library("rsm")
house.rsm = rsm(price ~ SO(driveway,full, stories, bathrooms), data=Housing4)
## Warning in rsm(price ~ SO(driveway, full, stories, bathrooms), data = Housing4): Some coefficients are aliased - cannot use 'rsm' methods.
## Returning an 'lm' object.
summary(house.rsm)
##
## Call:
## rsm(formula = price ~ SO(driveway, full, stories, bathrooms),
## data = Housing4)
##
## Residuals:
## Min 1Q Median 3Q Max
## -52871 -11403 -1741 8652 91259
##
## Coefficients: (2 not defined because of singularities)
## Estimate
## (Intercept) 40696.64
## FO(driveway, full, stories, bathrooms)driveway 12706.62
## FO(driveway, full, stories, bathrooms)full 15225.42
## FO(driveway, full, stories, bathrooms)stories 533.42
## FO(driveway, full, stories, bathrooms)bathrooms 7189.88
## TWI(driveway, full, stories, bathrooms)driveway:full 1618.92
## TWI(driveway, full, stories, bathrooms)driveway:stories -878.32
## TWI(driveway, full, stories, bathrooms)driveway:bathrooms 18140.16
## TWI(driveway, full, stories, bathrooms)full:stories -2453.94
## TWI(driveway, full, stories, bathrooms)full:bathrooms -10517.06
## TWI(driveway, full, stories, bathrooms)stories:bathrooms 3263.53
## PQ(driveway, full, stories, bathrooms)driveway^2 NA
## PQ(driveway, full, stories, bathrooms)full^2 NA
## PQ(driveway, full, stories, bathrooms)stories^2 4292.57
## PQ(driveway, full, stories, bathrooms)bathrooms^2 53.17
## Std. Error
## (Intercept) 3461.62
## FO(driveway, full, stories, bathrooms)driveway 3799.19
## FO(driveway, full, stories, bathrooms)full 5847.83
## FO(driveway, full, stories, bathrooms)stories 5518.50
## FO(driveway, full, stories, bathrooms)bathrooms 8634.27
## TWI(driveway, full, stories, bathrooms)driveway:full 5679.11
## TWI(driveway, full, stories, bathrooms)driveway:stories 4670.09
## TWI(driveway, full, stories, bathrooms)driveway:bathrooms 5591.54
## TWI(driveway, full, stories, bathrooms)full:stories 3525.06
## TWI(driveway, full, stories, bathrooms)full:bathrooms 4444.10
## TWI(driveway, full, stories, bathrooms)stories:bathrooms 3961.84
## PQ(driveway, full, stories, bathrooms)driveway^2 NA
## PQ(driveway, full, stories, bathrooms)full^2 NA
## PQ(driveway, full, stories, bathrooms)stories^2 2362.55
## PQ(driveway, full, stories, bathrooms)bathrooms^2 3999.28
## t value Pr(>|t|)
## (Intercept) 11.757 < 2e-16
## FO(driveway, full, stories, bathrooms)driveway 3.345 0.000887
## FO(driveway, full, stories, bathrooms)full 2.604 0.009504
## FO(driveway, full, stories, bathrooms)stories 0.097 0.923036
## FO(driveway, full, stories, bathrooms)bathrooms 0.833 0.405410
## TWI(driveway, full, stories, bathrooms)driveway:full 0.285 0.775714
## TWI(driveway, full, stories, bathrooms)driveway:stories -0.188 0.850896
## TWI(driveway, full, stories, bathrooms)driveway:bathrooms 3.244 0.001258
## TWI(driveway, full, stories, bathrooms)full:stories -0.696 0.486669
## TWI(driveway, full, stories, bathrooms)full:bathrooms -2.367 0.018342
## TWI(driveway, full, stories, bathrooms)stories:bathrooms 0.824 0.410485
## PQ(driveway, full, stories, bathrooms)driveway^2 NA NA
## PQ(driveway, full, stories, bathrooms)full^2 NA NA
## PQ(driveway, full, stories, bathrooms)stories^2 1.817 0.069836
## PQ(driveway, full, stories, bathrooms)bathrooms^2 0.013 0.989399
##
## (Intercept) ***
## FO(driveway, full, stories, bathrooms)driveway ***
## FO(driveway, full, stories, bathrooms)full **
## FO(driveway, full, stories, bathrooms)stories
## FO(driveway, full, stories, bathrooms)bathrooms
## TWI(driveway, full, stories, bathrooms)driveway:full
## TWI(driveway, full, stories, bathrooms)driveway:stories
## TWI(driveway, full, stories, bathrooms)driveway:bathrooms **
## TWI(driveway, full, stories, bathrooms)full:stories
## TWI(driveway, full, stories, bathrooms)full:bathrooms *
## TWI(driveway, full, stories, bathrooms)stories:bathrooms
## PQ(driveway, full, stories, bathrooms)driveway^2
## PQ(driveway, full, stories, bathrooms)full^2
## PQ(driveway, full, stories, bathrooms)stories^2 .
## PQ(driveway, full, stories, bathrooms)bathrooms^2
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 19990 on 492 degrees of freedom
## Multiple R-squared: 0.3786, Adjusted R-squared: 0.3635
## F-statistic: 24.99 on 12 and 492 DF, p-value: < 2.2e-16
Based on the values from the RSM model, we can reject the null hypothesis for first-order driveway, first-order full, and two-way interaction between driveway and bathrooms.
These contour plots ahow the price in response to two variables per plot:
#Contour Plots
par(mfrow=c(2,3))
contour(house.rsm, ~driveway + full + stories + bathrooms, image=TRUE, at=summary(house.rsm$canonical$xs))
## Warning in predict.lm(lmobj, newdata = newdata): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(lmobj, newdata = newdata): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(lmobj, newdata = newdata): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(lmobj, newdata = newdata): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(lmobj, newdata = newdata): prediction from a rank-
## deficient fit may be misleading
## Warning in predict.lm(lmobj, newdata = newdata): prediction from a rank-
## deficient fit may be misleading
These are perspective plots of the interactions:
#Perspective Plots
par(mfrow=c(1,1))
persp(house.rsm, ~ driveway + full, image = TRUE,at = c(summary(house.rsm)$canonical$xs, Block="B2"),contour="colors",zlab="Price in Dollars",theta=30)
## Warning in predict.lm(lmobj, newdata = newdata): prediction from a rank-
## deficient fit may be misleading
## Warning in persp.default(dat$x, dat$y, dat$z, zlim = dat$zlim, theta =
## theta, : "image" is not a graphical parameter
## Warning in persp.default(dat$x, dat$y, dat$z, xlab = dat$labs[1], ylab =
## dat$labs[2], : "image" is not a graphical parameter
## Warning in title(sub = dat$labs[5], ...): "image" is not a graphical
## parameter
persp(house.rsm, ~ driveway + stories, image = TRUE,at = c(summary(house.rsm)$canonical$xs, Block="B2"),contour="colors",zlab="Price in Dollars",theta=30)
## Warning in predict.lm(lmobj, newdata = newdata): prediction from a rank-
## deficient fit may be misleading
## Warning in persp.default(dat$x, dat$y, dat$z, zlim = dat$zlim, theta =
## theta, : "image" is not a graphical parameter
## Warning in persp.default(dat$x, dat$y, dat$z, xlab = dat$labs[1], ylab =
## dat$labs[2], : "image" is not a graphical parameter
## Warning in title(sub = dat$labs[5], ...): "image" is not a graphical
## parameter
persp(house.rsm, ~ driveway + bathrooms, image = TRUE,at = c(summary(house.rsm)$canonical$xs, Block="B2"),contour="colors",zlab="Price in Dollars",theta=30)
## Warning in predict.lm(lmobj, newdata = newdata): prediction from a rank-
## deficient fit may be misleading
## Warning in persp.default(dat$x, dat$y, dat$z, zlim = dat$zlim, theta =
## theta, : "image" is not a graphical parameter
## Warning in persp.default(dat$x, dat$y, dat$z, xlab = dat$labs[1], ylab =
## dat$labs[2], : "image" is not a graphical parameter
## Warning in title(sub = dat$labs[5], ...): "image" is not a graphical
## parameter
persp(house.rsm, ~ full + stories, image = TRUE,at = c(summary(house.rsm)$canonical$xs, Block="B2"),contour="colors",zlab="Price in Dollars",theta=30)
## Warning in predict.lm(lmobj, newdata = newdata): prediction from a rank-
## deficient fit may be misleading
## Warning in persp.default(dat$x, dat$y, dat$z, zlim = dat$zlim, theta =
## theta, : "image" is not a graphical parameter
## Warning in persp.default(dat$x, dat$y, dat$z, xlab = dat$labs[1], ylab =
## dat$labs[2], : "image" is not a graphical parameter
## Warning in title(sub = dat$labs[5], ...): "image" is not a graphical
## parameter
persp(house.rsm, ~ full + bathrooms, image = TRUE,at = c(summary(house.rsm)$canonical$xs, Block="B2"),contour="colors",zlab="Price in Dollars",theta=30)
## Warning in predict.lm(lmobj, newdata = newdata): prediction from a rank-
## deficient fit may be misleading
## Warning in persp.default(dat$x, dat$y, dat$z, zlim = dat$zlim, theta =
## theta, : "image" is not a graphical parameter
## Warning in persp.default(dat$x, dat$y, dat$z, xlab = dat$labs[1], ylab =
## dat$labs[2], : "image" is not a graphical parameter
## Warning in title(sub = dat$labs[5], ...): "image" is not a graphical
## parameter
persp(house.rsm, ~ stories + bathrooms, image = TRUE,at = c(summary(house.rsm)$canonical$xs, Block="B2"),contour="colors",zlab="Price in Dollars",theta=30)
## Warning in predict.lm(lmobj, newdata = newdata): prediction from a rank-
## deficient fit may be misleading
## Warning in persp.default(dat$x, dat$y, dat$z, zlim = dat$zlim, theta =
## theta, : "image" is not a graphical parameter
## Warning in persp.default(dat$x, dat$y, dat$z, xlab = dat$labs[1], ylab =
## dat$labs[2], : "image" is not a graphical parameter
## Warning in title(sub = dat$labs[5], ...): "image" is not a graphical
## parameter
We will use the Shapiro-Wilk test to test for normalcy. For this test the null-hypothesis is that the population is normally distributed. Therefore, if the p-value we get is less that alpha = .05, we will reject the null hypothesis and say that there is evidence that the data is not normally distributed.
#Normalcy Test
shapiro.test(Housing4$price)
##
## Shapiro-Wilk normality test
##
## data: Housing4$price
## W = 0.90553, p-value < 2.2e-16
shapiro.test(1/(Housing4$price))
##
## Shapiro-Wilk normality test
##
## data: 1/(Housing4$price)
## W = 0.95496, p-value = 2.724e-11
shapiro.test((Housing4$price)^2)
##
## Shapiro-Wilk normality test
##
## data: (Housing4$price)^2
## W = 0.72113, p-value < 2.2e-16
shapiro.test(sqrt(Housing4$price))
##
## Shapiro-Wilk normality test
##
## data: sqrt(Housing4$price)
## W = 0.96574, p-value = 1.816e-09
shapiro.test(log(Housing4$price))
##
## Shapiro-Wilk normality test
##
## data: log(Housing4$price)
## W = 0.99474, p-value = 0.08166
shapiro.test(log10(Housing4$price))
##
## Shapiro-Wilk normality test
##
## data: log10(Housing4$price)
## W = 0.99474, p-value = 0.08166
The results from these tests are that we fail to reject the null hypothesis for the log and log10 transformations. We cannot say that these two transformation will not result is a normal distribution.
We can also use a Q-Q plot to check for normalcy:
#Model Adequecy
qqnorm(residuals(house.rsm), main="Normal Q-Q Plot", ylab="Price")
qqline(residuals(house.rsm))
plot(fitted(house.rsm),residuals(house.rsm))
As you can see, the Q-Q plot is not perfect and the residuals plot has a lot of linearity and is not random or sporadic.
Due to the RSM analysis and the plots above, I fail to reject the null hypothesis. This data is not normally distributed and since that is one of the assumptions when completing at RSM analysis I cannot reject the null hypothesis.
#Read in unemployment data
library("Ecdat")
data(Housing)
#Summary of data
head(Housing)
summary(Housing)
#Make subsets to delete all rows with 4 stories or 4 bathrooms
Housing2 = subset(Housing, stories != "4")
Housing3 = subset(Housing2, bathrms != "4")
#Make bathrms and stories factors
Housing3$bathrms <- as.factor(Housing3$bathrms)
Housing3$stories <- as.factor(Housing3$stories)
#Drop old levels
levels(droplevels(Housing3$bathrms))
levels(droplevels(Housing3$stories))
#condense to have only 4 IV and the respnse
Housing3$lotsize <- NULL
Housing3$bedrooms <- NULL
Housing3$recroom <- NULL
Housing3$gashw <- NULL
Housing3$airco <- NULL
Housing3$garagepl <- NULL
Housing3$prefarea <- NULL
#Look at newly condensed dataframe
head(droplevels(Housing3))
summary(droplevels(Housing3))
str(droplevels(Housing3))
# Find length of frame
l = nrow(Housing3)
# Create four new frames for writing numbers instead of factors
drivewaynum = data.frame(l)
fullbasenum = data.frame(l)
storiesnum = data.frame(l)
bathrmsnum = data.frame(l)
## Replacement loop
for (i in 1:l){
# driveway replace yes with 1 and no with 0
if (Housing3$driveway[i] == "yes"){
drivewaynum[i,1] <- 1
} else {
drivewaynum[i,1] <- 0
}
# fullbase replace yes with 1 and no with 0
if (Housing3$fullbase[i] == "yes"){
fullbasenum[i,1] <- 1
} else {
fullbasenum[i,1] <- 0
}
#stories 1 = 0, 2 = 1, 3 = 2
if (Housing3$stories[i] == "1"){
storiesnum[i,1] <- 0
}
if (Housing3$stories[i] == "2") {
storiesnum[i,1] <- 1
}
if (Housing3$stories[i] == "3") {
storiesnum[i,1] <- 2
}
#bathrms 1 = 0, 2 = 1, 3 = 2
if (Housing3$bathrms[i] == "1"){
bathrmsnum[i,1] <- 0
}
if (Housing3$bathrms[i] == "2") {
bathrmsnum[i,1] <- 1
}
if (Housing3$bathrms[i] == "3") {
bathrmsnum[i,1] <- 2
}
}
# Create numbered data frame out of column vectors and response variable
Housing4 <- cbind(drivewaynum,fullbasenum,storiesnum,bathrmsnum,Housing3$price)
# Title columns of data frame appropriately
colnames(Housing4) <- c("driveway","full","stories","bathrooms","price")
head(Housing4)
#Summary of new dataframe
head(Housing4)
str(Housing4)
summary(Housing4)
#Boxplot of price and four IVs
boxplot(Housing4$price ~ Housing4$full + Housing4$driveway +
Housing4$stories + Housing4$bathrooms, xlab= "fullbasement +
driveway + stories + bathrooms",ylab="price of house",
main="Histogram of Price and four IVs")
#Install FrF2
install.packages("FrF2")
library("FrF2")
#64 Full Factorial Runs
f = FrF2(64,6)
print(f)
#FrF2
b <- FrF2(8,6,res3 = T)
print(b)
#Alias structure
aliasprint(b)
#Boxplots of Four IVS and response price
boxplot(Housing4$price~Housing4$driveway,
main="Affect of Driveway on House Price",
ylab="House Price", xlab="1= Driveway, 0 = No Driveway")
boxplot(Housing4$price~Housing4$full,
main="Affect of Full Basement on House Price",
ylab="House Price", xlab="1= Full Basement, 0 = No Full Basement")
boxplot(Housing4$price~Housing4$stories,
main="Affect of Number of Stories on House Price",
ylab="House Price", xlab="0 = 1 Story, 1 = Two Stories 2 = Three Stories")
boxplot(Housing4$price~Housing4$bathrooms,
main="Affect of Number of Bathrooms on House Price",
ylab="House Price", xlab="0 = 1 Bathrooms, 1 = Two Bathrooms 2 = Three Bathrooms")
#Interaction Effects and Main Effects Model
model1 = lm(Housing4$price ~ (Housing4$driveway + Housing4$full + Housing4$stories + Housing4$bathrooms)^2)
anova(model1)
#Main Effects Model
model2 = lm(Housing4$price ~ (Housing4$driveway + Housing4$full + Housing4$stories + Housing4$bathrooms))
anova(model2)
# Model Adequacy Checking
qqnorm(residuals(model2), main = "Normal Q-Q Plot")
qqline(residuals(model2))
plot(fitted(model2),residuals(model2))
str(Housing4)
#RSM
install.packages("rsm")
library("rsm")
house.rsm = rsm(price ~ SO(driveway,full, stories, bathrooms), data=Housing4)
summary(house.rsm)
#Contour Plots
par(mfrow=c(2,3))
contour(house.rsm, ~driveway + full + stories + bathrooms, image=TRUE, at=summary(house.rsm$canonical$xs))
#Perspective Plots
par(mfrow=c(1,1))
persp(house.rsm, ~ driveway + full, image = TRUE,at = c(summary(house.rsm)$canonical$xs, Block="B2"),contour="colors",zlab="Price in Dollars",theta=30)
persp(house.rsm, ~ driveway + stories, image = TRUE,at = c(summary(house.rsm)$canonical$xs, Block="B2"),contour="colors",zlab="Price in Dollars",theta=30)
persp(house.rsm, ~ driveway + bathrooms, image = TRUE,at = c(summary(house.rsm)$canonical$xs, Block="B2"),contour="colors",zlab="Price in Dollars",theta=30)
persp(house.rsm, ~ full + stories, image = TRUE,at = c(summary(house.rsm)$canonical$xs, Block="B2"),contour="colors",zlab="Price in Dollars",theta=30)
persp(house.rsm, ~ full + bathrooms, image = TRUE,at = c(summary(house.rsm)$canonical$xs, Block="B2"),contour="colors",zlab="Price in Dollars",theta=30)
persp(house.rsm, ~ stories + bathrooms, image = TRUE,at = c(summary(house.rsm)$canonical$xs, Block="B2"),contour="colors",zlab="Price in Dollars",theta=30)
#Steepest
steepest(house.rsm)
canonical.path(house.rsm, dist = seq(-5, 5, by = 0.5))
#Normalcy Test
shapiro.test(Housing4$price)
shapiro.test(1/(Housing4$price))
shapiro.test((Housing4$price)^2)
shapiro.test(sqrt(Housing4$price))
shapiro.test(log(Housing4$price))
shapiro.test(log10(Housing4$price))
#Model Adequecy
qqnorm(residuals(house.rsm), main="Normal Q-Q Plot", ylab="Price")
qqline(residuals(house.rsm))
plot(fitted(house.rsm),residuals(house.rsm))