housing<-read.csv("housingprice.csv", header=TRUE)
head(housing)
## price driveway recroom fullbase gashw airco prefarea garagepl lotsize
## 1 30500 -1 -1 -1 -1 -1 -1 0 3000
## 2 42300 1 -1 -1 -1 -1 -1 0 3000
## 3 35000 -1 1 -1 -1 -1 -1 1 3240
## 4 54500 -1 -1 1 -1 -1 -1 0 3150
## 5 40500 -1 -1 -1 1 -1 -1 1 4350
## 6 44500 -1 -1 -1 -1 1 -1 0 3000
## bedrooms bathrms stories
## 1 2 1 1
## 2 2 1 2
## 3 2 1 1
## 4 2 2 1
## 5 3 1 2
## 6 3 1 1
str(housing)
## 'data.frame': 64 obs. of 12 variables:
## $ price : int 30500 42300 35000 54500 40500 44500 35500 35000 51000 56000 ...
## $ driveway: int -1 1 -1 -1 -1 -1 -1 1 1 1 ...
## $ recroom : int -1 -1 1 -1 -1 -1 -1 1 -1 -1 ...
## $ fullbase: int -1 -1 -1 1 -1 -1 -1 -1 1 -1 ...
## $ gashw : int -1 -1 -1 -1 1 -1 -1 -1 -1 1 ...
## $ airco : int -1 -1 -1 -1 -1 1 -1 -1 -1 -1 ...
## $ prefarea: int -1 -1 -1 -1 -1 -1 1 -1 -1 -1 ...
## $ garagepl: int 0 0 1 0 1 0 0 0 0 1 ...
## $ lotsize : int 3000 3000 3240 3150 4350 3000 3000 3500 3150 3290 ...
## $ bedrooms: int 2 2 2 2 3 3 3 2 3 2 ...
## $ bathrms : int 1 1 1 2 1 1 1 1 1 1 ...
## $ stories : int 1 2 1 1 2 1 2 1 2 1 ...
summary(housing)
## price driveway recroom fullbase gashw
## Min. : 25000 Min. :-1.0000 Min. :-1 Min. :-1 Min. :-1
## 1st Qu.: 41625 1st Qu.:-1.0000 1st Qu.:-1 1st Qu.:-1 1st Qu.:-1
## Median : 51500 Median : 1.0000 Median : 0 Median : 0 Median : 0
## Mean : 54527 Mean : 0.0625 Mean : 0 Mean : 0 Mean : 0
## 3rd Qu.: 62000 3rd Qu.: 1.0000 3rd Qu.: 1 3rd Qu.: 1 3rd Qu.: 1
## Max. :138300 Max. : 1.0000 Max. : 1 Max. : 1 Max. : 1
## airco prefarea garagepl lotsize bedrooms
## Min. :-1 Min. :-1 Min. :0.000 Min. :1905 Min. :2.00
## 1st Qu.:-1 1st Qu.:-1 1st Qu.:0.000 1st Qu.:3000 1st Qu.:2.00
## Median : 0 Median : 0 Median :0.000 Median :3415 Median :3.00
## Mean : 0 Mean : 0 Mean :0.453 Mean :3654 Mean :2.92
## 3rd Qu.: 1 3rd Qu.: 1 3rd Qu.:1.000 3rd Qu.:4081 3rd Qu.:3.00
## Max. : 1 Max. : 1 Max. :2.000 Max. :7686 Max. :5.00
## bathrms stories
## Min. :1.00 Min. :1.00
## 1st Qu.:1.00 1st Qu.:1.00
## Median :1.00 Median :2.00
## Mean :1.30 Mean :1.62
## 3rd Qu.:1.25 3rd Qu.:2.00
## Max. :3.00 Max. :3.00
#Factors in dataset
housing$driveway=as.character(housing$driveway)
housing$recroom=as.character(housing$recroom)
housing$fullbase=as.character(housing$fullbase)
housing$gashw=as.character(housing$gashw)
housing$airco=as.character(housing$airco)
housing$prefarea=as.character(housing$prefarea)
#Boxplot
par(mfrow=c(2,3))
boxplot(housing$price~housing$driveway,xlab="driveway (1=yes,-1=no)",ylab="housing price")
boxplot(housing$price~housing$recroom,xlab="recroom (1=yes,-1=no)",ylab="housing price")
boxplot(housing$price~housing$fullbase,xlab="fullbase (1=yes,-1=no)",ylab="housing price")
boxplot(housing$price~housing$gashw,xlab="gashw (1=yes,-1=no)",ylab="housing price")
boxplot(housing$price~housing$airco,xlab="airco (1=yes,-1=no)",ylab="housing price")
boxplot(housing$price~housing$prefarea,xlab="prefarea (1=yes,-1=no)",ylab="housing price")
# Analysis of variance
aov=lm(price~driveway+recroom+fullbase+gashw+airco+prefarea, data=housing)
anova(aov)
## Analysis of Variance Table
##
## Response: price
## Df Sum Sq Mean Sq F value Pr(>F)
## driveway 1 2.16e+09 2.16e+09 7.20 0.0095 **
## recroom 1 1.36e+09 1.36e+09 4.54 0.0375 *
## fullbase 1 1.91e+08 1.91e+08 0.64 0.4279
## gashw 1 2.09e+09 2.09e+09 6.98 0.0106 *
## airco 1 8.58e+07 8.58e+07 0.29 0.5949
## prefarea 1 1.17e+09 1.17e+09 3.90 0.0533 .
## Residuals 57 1.71e+10 3.00e+08
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Summary of linear model
summodel=lm(price~driveway+recroom+fullbase+gashw+airco+prefarea, data=housing)
summary(summodel)
##
## Call:
## lm(formula = price ~ driveway + recroom + fullbase + gashw +
## airco + prefarea, data = housing)
##
## Residuals:
## Min 1Q Median 3Q Max
## -26545 -10533 -1595 8219 61315
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 31408 5735 5.48 1e-06 ***
## driveway1 10559 4365 2.42 0.019 *
## recroom1 9274 4339 2.14 0.037 *
## fullbase1 3419 4339 0.79 0.434
## gashw1 11462 4339 2.64 0.011 *
## airco1 2316 4330 0.53 0.595
## prefarea1 8547 4330 1.97 0.053 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17300 on 57 degrees of freedom
## Multiple R-squared: 0.292, Adjusted R-squared: 0.218
## F-statistic: 3.92 on 6 and 57 DF, p-value: 0.00238
confint(summodel)
## 2.5 % 97.5 %
## (Intercept) 19923.9 42892
## driveway1 1818.7 19299
## recroom1 585.8 17963
## fullbase1 -5269.4 12108
## gashw1 2773.3 20151
## airco1 -6355.8 10987
## prefarea1 -124.6 17218
#Q-Q norm plot
qqnorm(residuals(summodel))
qqline(residuals(summodel))
#Plot of fitted and residuals
plot(fitted(summodel),residuals(summodel))
library(FrF2)
## Warning: package 'FrF2' was built under R version 3.1.2
## Loading required package: DoE.base
## Loading required package: grid
## Loading required package: conf.design
##
## Attaching package: 'DoE.base'
##
## The following objects are masked from 'package:stats':
##
## aov, lm
##
## The following object is masked from 'package:graphics':
##
## plot.design
matrix = FrF2(32, nfactors=6, estimable=formula("~driveway+recroom+fullbase+gashw+airco+prefarea+driveway:(recroom+fullbase+gashw+airco+prefarea)"),factor.names=c("driveway","recroom","fullbase","gashw","airco","prefarea"),res4=TRUE,clear=FALSE,data=housing)
matrix
## driveway recroom fullbase gashw airco prefarea
## 1 1 -1 -1 1 1 1
## 2 1 1 -1 1 1 -1
## 3 1 -1 1 -1 -1 -1
## 4 1 1 -1 1 -1 1
## 5 -1 1 -1 -1 1 -1
## 6 -1 -1 -1 -1 -1 -1
## 7 1 -1 -1 -1 1 -1
## 8 -1 -1 1 -1 1 -1
## 9 1 1 1 -1 1 -1
## 10 1 -1 1 -1 1 1
## 11 -1 -1 1 1 1 1
## 12 -1 1 1 -1 1 1
## 13 -1 1 -1 1 1 1
## 14 1 -1 -1 -1 -1 1
## 15 1 1 1 1 -1 -1
## 16 1 1 1 1 1 1
## 17 -1 1 -1 1 -1 -1
## 18 -1 1 1 -1 -1 -1
## 19 1 1 1 -1 -1 1
## 20 1 -1 1 1 1 -1
## 21 -1 1 1 1 -1 1
## 22 -1 1 1 1 1 -1
## 23 -1 -1 1 1 -1 -1
## 24 1 1 -1 -1 -1 -1
## 25 -1 -1 -1 -1 1 1
## 26 -1 -1 -1 1 -1 1
## 27 1 1 -1 -1 1 1
## 28 1 -1 1 1 -1 1
## 29 -1 1 -1 -1 -1 1
## 30 1 -1 -1 1 -1 -1
## 31 -1 -1 1 -1 -1 1
## 32 -1 -1 -1 1 1 -1
## class=design, type= FrF2.estimable
aliasprint(matrix)
## $legend
## [1] A=driveway B=recroom C=fullbase D=gashw E=airco F=prefarea
##
## [[2]]
## [1] no aliasing among main effects and 2fis
sample=merge(matrix,housing,by=c("driveway","recroom","fullbase","gashw","airco","prefarea"),all=FALSE)
sample
## driveway recroom fullbase gashw airco prefarea price garagepl lotsize
## 1 -1 -1 -1 -1 -1 -1 30500 0 3000
## 2 -1 -1 -1 -1 1 1 61000 0 2175
## 3 -1 -1 -1 1 -1 1 44000 1 4500
## 4 -1 -1 -1 1 1 -1 51000 0 4500
## 5 -1 -1 1 -1 -1 1 51000 2 4500
## 6 -1 -1 1 -1 1 -1 40000 1 2650
## 7 -1 -1 1 1 -1 -1 56000 0 3000
## 8 -1 -1 1 1 1 1 38000 0 2800
## 9 -1 1 -1 -1 -1 1 31900 0 5300
## 10 -1 1 -1 -1 1 -1 35000 1 3240
## 11 -1 1 -1 1 -1 -1 38000 0 3630
## 12 -1 1 -1 1 1 1 46000 1 2684
## 13 -1 1 1 -1 -1 -1 72000 0 3540
## 14 -1 1 1 -1 1 1 48000 0 3100
## 15 -1 1 1 1 -1 1 52500 0 3630
## 16 -1 1 1 1 1 -1 32000 0 1950
## 17 1 -1 -1 -1 -1 1 62900 0 2880
## 18 1 -1 -1 -1 1 -1 37900 0 3185
## 19 1 -1 -1 1 -1 -1 56000 1 3290
## 20 1 -1 -1 1 1 1 38000 0 2430
## 21 1 -1 1 -1 -1 -1 51000 0 3150
## 22 1 -1 1 -1 1 1 46000 1 4320
## 23 1 -1 1 1 -1 1 50000 0 3036
## 24 1 -1 1 1 1 -1 57500 2 3630
## 25 1 1 -1 -1 -1 -1 35000 0 3500
## 26 1 1 -1 -1 1 1 70000 0 5400
## 27 1 1 -1 1 -1 1 69900 2 3420
## 28 1 1 -1 1 -1 1 130000 2 6000
## 29 1 1 -1 1 1 -1 63900 1 3162
## 30 1 1 -1 1 1 -1 74500 2 3180
## 31 1 1 1 -1 -1 1 42000 0 3660
## 32 1 1 1 -1 1 -1 46500 0 3930
## 33 1 1 1 1 -1 -1 52000 0 3570
## 34 1 1 1 1 1 1 138300 2 6000
## bedrooms bathrms stories
## 1 2 1 1
## 2 3 1 2
## 3 2 1 2
## 4 2 1 1
## 5 4 2 2
## 6 3 1 2
## 7 3 1 2
## 8 3 1 1
## 9 3 1 1
## 10 2 1 1
## 11 3 3 2
## 12 2 1 1
## 13 2 1 1
## 14 3 1 2
## 15 2 1 1
## 16 3 1 1
## 17 3 1 2
## 18 2 1 1
## 19 2 1 1
## 20 3 1 1
## 21 3 1 2
## 22 3 1 1
## 23 3 1 2
## 24 3 2 2
## 25 2 1 1
## 26 4 1 2
## 27 4 2 2
## 28 4 1 2
## 29 3 1 2
## 30 3 2 2
## 31 4 1 2
## 32 2 1 1
## 33 3 1 2
## 34 4 3 2
#There supposed to be 32 runs but somehow there are 34 in sample. Problem will be solved ASAP.#
aovsample=lm(price~driveway*recroom+driveway*fullbase+driveway*gashw+driveway*airco+driveway*prefarea,data=sample)
anova(aovsample)
## Analysis of Variance Table
##
## Response: price
## Df Sum Sq Mean Sq F value Pr(>F)
## driveway 1 2.41e+09 2.41e+09 5.54 0.028 *
## recroom 1 9.82e+08 9.82e+08 2.26 0.147
## fullbase 1 2.94e+07 2.94e+07 0.07 0.797
## gashw 1 1.12e+09 1.12e+09 2.58 0.122
## airco 1 3.56e+04 3.56e+04 0.00 0.993
## prefarea 1 1.07e+09 1.07e+09 2.46 0.131
## driveway:recroom 1 1.16e+09 1.16e+09 2.66 0.117
## driveway:fullbase 1 9.40e+07 9.40e+07 0.22 0.647
## driveway:gashw 1 1.17e+09 1.17e+09 2.68 0.116
## driveway:airco 1 1.04e+08 1.04e+08 0.24 0.629
## driveway:prefarea 1 6.45e+08 6.45e+08 1.48 0.236
## Residuals 22 9.57e+09 4.35e+08
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Shapiro.test
shapiro.test(sample$price)
##
## Shapiro-Wilk normality test
##
## data: sample$price
## W = 0.7431, p-value = 2.421e-06
#Q-Q norm plot
qqnorm(residuals(aovsample))
qqline(residuals(aovsample))
#Plot of fitted and residuals
plot(fitted(aovsample),residuals(aovsample))
Blank