options(width=100)
# Insert your R script here
library(abind, pos=26)
library(e1071, pos=27)
library(car)
# library(Rcmdr)
library(RcmdrMisc)
# IMPORTANT
# Path in following line should point to a local copy of the data file on
# your computer
######cycle1
setwd("C:/Users/Noura/Desktop/regresstions")
Appraise <- read.table("Appraise10.csv", header=TRUE,
sep=",", na.strings="NA", dec=".", strip.white=TRUE)
numSummary(Appraise[,c("Price", "Age", "AttGar", "Baths", "BdRms", "Central", "East",
"Rooms", "SqFt", "View", "West")], statistics=c("mean", "sd",
"quantiles", "skewness"), quantiles=c(0,.25,.5,.75,1), type="2")
## mean sd skewness 0% 25% 50% 75% 100% n
## Price 276.7200 78.1093007 1.19116525 162.0 226.000 262.000 312.25 559.0 50
## Age 11.4000 4.9321933 0.54153012 2.0 8.000 11.000 14.50 25.0 50
## AttGar 0.5200 0.5046720 -0.08256187 0.0 0.000 1.000 1.00 1.0 50
## Baths 1.8200 0.5225526 -0.22097460 1.0 2.000 2.000 2.00 3.0 50
## BdRms 2.9200 0.6006799 0.61487641 2.0 3.000 3.000 3.00 5.0 50
## Central 0.4800 0.5046720 0.08256187 0.0 0.000 0.000 1.00 1.0 50
## East 0.2200 0.4184520 1.39402701 0.0 0.000 0.000 0.00 1.0 50
## Rooms 7.1800 1.0631106 1.74810852 5.0 7.000 7.000 7.75 12.0 50
## SqFt 1.8964 0.5806005 0.02711705 0.8 1.525 1.945 2.24 3.3 50
## View 0.1000 0.3030458 2.74985970 0.0 0.000 0.000 0.00 1.0 50
## West 0.3000 0.4629100 0.90010287 0.0 0.000 0.000 1.00 1.0 50
cor(Appraise[,c("Price","Age","AttGar","Baths","BdRms","Central","East",
"Rooms","SqFt","View","West")], use="complete")
## Price Age AttGar Baths BdRms Central East
## Price 1.0000000 0.44172728 0.53753458 0.50374133 0.530174739 0.144297913 -0.324008181
## Age 0.4417273 1.00000000 0.22628911 0.28981102 0.334778078 -0.037714852 -0.053396401
## AttGar 0.5375346 0.22628911 1.00000000 0.20739556 0.274670324 0.201923077 -0.166217793
## Baths 0.5037413 0.28981102 0.20739556 1.00000000 0.343292832 0.024763648 -0.095198142
## BdRms 0.5301747 0.33477808 0.27467032 0.34329283 1.000000000 -0.005385693 -0.009743085
## Central 0.1442979 -0.03771485 0.20192308 0.02476365 -0.005385693 1.000000000 -0.510249968
## East -0.3240082 -0.05339640 -0.16621779 -0.09519814 -0.009743085 -0.510249968 1.000000000
## Rooms 0.7944452 0.47250247 0.43058873 0.72076630 0.630215825 0.025865754 -0.136708697
## SqFt 0.8906853 0.47251220 0.48292092 0.55209353 0.437451041 0.128600610 -0.208354367
## View 0.3840106 -0.10923092 0.18681618 -0.01288741 -0.067267279 0.213504205 -0.177028335
## West 0.1355744 0.08938553 -0.06988566 0.05905754 0.014678924 -0.628970902 -0.347676748
## Rooms SqFt View West
## Price 0.79444522 0.89068534 0.38401056 0.13557436
## Age 0.47250247 0.47251220 -0.10923092 0.08938553
## AttGar 0.43058873 0.48292092 0.18681618 -0.06988566
## Baths 0.72076630 0.55209353 -0.01288741 0.05905754
## BdRms 0.63021583 0.43745104 -0.06726728 0.01467892
## Central 0.02586575 0.12860061 0.21350421 -0.62897090
## East -0.13670870 -0.20835437 -0.17702833 -0.34767675
## Rooms 1.00000000 0.66796087 0.06968029 0.09537987
## SqFt 0.66796087 1.00000000 0.14011519 0.04814145
## View 0.06968029 0.14011519 1.00000000 -0.07273930
## West 0.09537987 0.04814145 -0.07273930 1.00000000
scatterplotMatrix(~Price+Age+Baths+BdRms+Rooms+SqFt+View+West,
reg.line=FALSE, smooth=FALSE, spread=FALSE, span=0.5, ellipse=FALSE,
levels=c(.5, .9), id.n=0, diagonal = 'density', data=Appraise)

Full <- lm(Price~Age+AttGar+Baths+BdRms+East+Rooms+SqFt+View+West,
data=Appraise)
summary(Full)
##
## Call:
## lm(formula = Price ~ Age + AttGar + Baths + BdRms + East + Rooms +
## SqFt + View + West, data = Appraise)
##
## Residuals:
## Min 1Q Median 3Q Max
## -23.211 -7.620 -2.309 3.620 37.911
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -91.0526 15.5667 -5.849 7.70e-07 ***
## Age -0.1321 0.4948 -0.267 0.79079
## AttGar 3.1798 4.8514 0.655 0.51594
## Baths -26.7888 6.0270 -4.445 6.81e-05 ***
## BdRms 8.1108 4.5297 1.791 0.08093 .
## East -15.4459 5.4071 -2.857 0.00676 **
## Rooms 32.2002 3.9236 8.207 4.16e-10 ***
## SqFt 81.7107 5.2852 15.460 < 2e-16 ***
## View 65.8995 7.1897 9.166 2.24e-11 ***
## West 11.1722 4.7879 2.333 0.02474 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.13 on 40 degrees of freedom
## Multiple R-squared: 0.9733, Adjusted R-squared: 0.9673
## F-statistic: 161.8 on 9 and 40 DF, p-value: < 2.2e-16
oldpar <- par(oma=c(0,0,3,0), mfrow=c(2,2))
plot(Full)

par(oldpar)
.NewData <- data.frame(Age=10, AttGar=1, Baths=2.5, BdRms=3, Central=0,
East=0, Rooms=8, SqFt=2, View=0, West=1, row.names="1")
.NewData # Newdata
## Age AttGar Baths BdRms Central East Rooms SqFt View West
## 1 10 1 2.5 3 0 0 8 2 0 1
predict(Full, newdata=.NewData, interval="prediction", level=.95,
se.fit=FALSE)
## fit lwr upr
## 1 300.3619 269.7245 330.9994
#####cycle2
Reduced <- stepwise(Full, direction='backward/forward', criterion='BIC')
##
## Direction: backward/forward
## Criterion: BIC
##
## Start: AIC=292.82
## Price ~ Age + AttGar + Baths + BdRms + East + Rooms + SqFt +
## View + West
##
## Df Sum of Sq RSS AIC
## - Age 1 14 8005 289.00
## - AttGar 1 86 8077 289.45
## - BdRms 1 641 8632 292.77
## <none> 7991 292.82
## - West 1 1088 9079 295.29
## - East 1 1630 9621 298.19
## - Baths 1 3947 11938 308.98
## - Rooms 1 13455 21446 338.27
## - View 1 16784 24775 345.49
## - SqFt 1 47752 55743 386.03
##
## Step: AIC=289
## Price ~ AttGar + Baths + BdRms + East + Rooms + SqFt + View +
## West
##
## Df Sum of Sq RSS AIC
## - AttGar 1 89 8094 285.64
## - BdRms 1 644 8649 288.96
## <none> 8005 289.00
## - West 1 1079 9084 291.41
## + Age 1 14 7991 292.82
## - East 1 1645 9650 294.43
## - Baths 1 3965 11971 305.21
## - Rooms 1 14062 22068 335.79
## - View 1 17659 25664 343.34
## - SqFt 1 51517 59522 385.40
##
## Step: AIC=285.64
## Price ~ Baths + BdRms + East + Rooms + SqFt + View + West
##
## Df Sum of Sq RSS AIC
## - BdRms 1 629 8723 285.47
## <none> 8094 285.64
## - West 1 1010 9104 287.61
## + AttGar 1 89 8005 289.00
## + Age 1 17 8077 289.45
## - East 1 1746 9840 291.50
## - Baths 1 4411 12505 303.48
## - Rooms 1 15555 23649 335.34
## - View 1 18015 26109 340.28
## - SqFt 1 57674 65768 386.48
##
## Step: AIC=285.47
## Price ~ Baths + East + Rooms + SqFt + View + West
##
## Df Sum of Sq RSS AIC
## <none> 8723 285.47
## + BdRms 1 629 8094 285.64
## - West 1 936 9659 286.65
## + AttGar 1 74 8649 288.96
## + Age 1 20 8702 289.26
## - East 1 1618 10341 290.06
## - Baths 1 5535 14258 306.12
## - View 1 17393 26115 336.38
## - Rooms 1 26876 35599 351.87
## - SqFt 1 59308 68031 384.26
summary(Reduced)
##
## Call:
## lm(formula = Price ~ Baths + East + Rooms + SqFt + View + West,
## data = Appraise)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24.109 -7.486 -0.972 2.737 36.784
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -91.907 15.001 -6.127 2.39e-07 ***
## Baths -29.810 5.707 -5.224 4.85e-06 ***
## East -15.256 5.402 -2.824 0.00716 **
## Rooms 36.093 3.136 11.510 1.02e-14 ***
## SqFt 83.087 4.859 17.099 < 2e-16 ***
## View 64.595 6.976 9.260 8.45e-12 ***
## West 10.222 4.758 2.148 0.03735 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.24 on 43 degrees of freedom
## Multiple R-squared: 0.9708, Adjusted R-squared: 0.9668
## F-statistic: 238.5 on 6 and 43 DF, p-value: < 2.2e-16
oldpar <- par(oma=c(0,0,3,0), mfrow=c(2,2))
plot(Reduced)

par(oldpar)
.NewData # Newdata
## Age AttGar Baths BdRms Central East Rooms SqFt View West
## 1 10 1 2.5 3 0 0 8 2 0 1
predict(Reduced, newdata=.NewData, interval="prediction", level=.95,
se.fit=FALSE)
## fit lwr upr
## 1 298.7051 268.494 328.9161
######## Cycle 3
Appraise$InPrice <- log(Appraise$Price)
numSummary(Appraise[,c("InPrice", "Age", "AttGar", "Baths", "BdRms", "Central", "East",
"Rooms", "SqFt", "View", "West")], statistics=c("mean", "sd",
"quantiles", "skewness"), quantiles=c(0,.25,.5,.75,1), type="2")
## mean sd skewness 0% 25% 50% 75% 100% n
## InPrice 5.587323 0.2661106 0.33625981 5.087596 5.420535 5.568337 5.743757 6.326149 50
## Age 11.400000 4.9321933 0.54153012 2.000000 8.000000 11.000000 14.500000 25.000000 50
## AttGar 0.520000 0.5046720 -0.08256187 0.000000 0.000000 1.000000 1.000000 1.000000 50
## Baths 1.820000 0.5225526 -0.22097460 1.000000 2.000000 2.000000 2.000000 3.000000 50
## BdRms 2.920000 0.6006799 0.61487641 2.000000 3.000000 3.000000 3.000000 5.000000 50
## Central 0.480000 0.5046720 0.08256187 0.000000 0.000000 0.000000 1.000000 1.000000 50
## East 0.220000 0.4184520 1.39402701 0.000000 0.000000 0.000000 0.000000 1.000000 50
## Rooms 7.180000 1.0631106 1.74810852 5.000000 7.000000 7.000000 7.750000 12.000000 50
## SqFt 1.896400 0.5806005 0.02711705 0.800000 1.525000 1.945000 2.240000 3.300000 50
## View 0.100000 0.3030458 2.74985970 0.000000 0.000000 0.000000 0.000000 1.000000 50
## West 0.300000 0.4629100 0.90010287 0.000000 0.000000 0.000000 1.000000 1.000000 50
cor(Appraise[,c("InPrice","Age","AttGar","Baths","BdRms","Central","East",
"Rooms","SqFt","View","West")], use="complete")
## InPrice Age AttGar Baths BdRms Central East
## InPrice 1.0000000 0.43462221 0.57838257 0.50806017 0.485231606 0.143541330 -0.338083936
## Age 0.4346222 1.00000000 0.22628911 0.28981102 0.334778078 -0.037714852 -0.053396401
## AttGar 0.5783826 0.22628911 1.00000000 0.20739556 0.274670324 0.201923077 -0.166217793
## Baths 0.5080602 0.28981102 0.20739556 1.00000000 0.343292832 0.024763648 -0.095198142
## BdRms 0.4852316 0.33477808 0.27467032 0.34329283 1.000000000 -0.005385693 -0.009743085
## Central 0.1435413 -0.03771485 0.20192308 0.02476365 -0.005385693 1.000000000 -0.510249968
## East -0.3380839 -0.05339640 -0.16621779 -0.09519814 -0.009743085 -0.510249968 1.000000000
## Rooms 0.7587326 0.47250247 0.43058873 0.72076630 0.630215825 0.025865754 -0.136708697
## SqFt 0.9238494 0.47251220 0.48292092 0.55209353 0.437451041 0.128600610 -0.208354367
## View 0.3892219 -0.10923092 0.18681618 -0.01288741 -0.067267279 0.213504205 -0.177028335
## West 0.1491231 0.08938553 -0.06988566 0.05905754 0.014678924 -0.628970902 -0.347676748
## Rooms SqFt View West
## InPrice 0.75873257 0.92384943 0.38922188 0.14912311
## Age 0.47250247 0.47251220 -0.10923092 0.08938553
## AttGar 0.43058873 0.48292092 0.18681618 -0.06988566
## Baths 0.72076630 0.55209353 -0.01288741 0.05905754
## BdRms 0.63021583 0.43745104 -0.06726728 0.01467892
## Central 0.02586575 0.12860061 0.21350421 -0.62897090
## East -0.13670870 -0.20835437 -0.17702833 -0.34767675
## Rooms 1.00000000 0.66796087 0.06968029 0.09537987
## SqFt 0.66796087 1.00000000 0.14011519 0.04814145
## View 0.06968029 0.14011519 1.00000000 -0.07273930
## West 0.09537987 0.04814145 -0.07273930 1.00000000
scatterplotMatrix(~InPrice+Baths+East+Rooms+SqFt+View+West,
reg.line=FALSE, smooth=FALSE, spread=FALSE, span=0.5, ellipse=FALSE,
levels=c(.5, .9), id.n=0, diagonal = 'density', data=Appraise)

logFull <- lm(InPrice~Age+AttGar+Baths+BdRms+East+Rooms+SqFt+View+West,
data=Appraise)
summary(logFull)
##
## Call:
## lm(formula = InPrice ~ Age + AttGar + Baths + BdRms + East +
## Rooms + SqFt + View + West, data = Appraise)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.036027 -0.009418 0.002833 0.011185 0.041543
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.4783498 0.0215045 208.252 < 2e-16 ***
## Age -0.0005976 0.0006835 -0.874 0.3871
## AttGar 0.0443142 0.0067020 6.612 6.53e-08 ***
## Baths -0.0620683 0.0083260 -7.455 4.40e-09 ***
## BdRms 0.0147703 0.0062576 2.360 0.0232 *
## East -0.0481231 0.0074696 -6.443 1.13e-07 ***
## Rooms 0.0751194 0.0054203 13.859 < 2e-16 ***
## SqFt 0.3142207 0.0073012 43.037 < 2e-16 ***
## View 0.2189815 0.0099322 22.048 < 2e-16 ***
## West 0.0534033 0.0066143 8.074 6.28e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01953 on 40 degrees of freedom
## Multiple R-squared: 0.9956, Adjusted R-squared: 0.9946
## F-statistic: 1007 on 9 and 40 DF, p-value: < 2.2e-16
oldpar <- par(oma=c(0,0,3,0), mfrow=c(2,2))
plot(logFull)

par(oldpar)
.NewData <- data.frame(Age=10, AttGar=1, Baths=2.5, BdRms=3, Central=0,
East=0, Rooms=8, SqFt=2, View=0, West=1, row.names="1")
.NewData # Newdata
## Age AttGar Baths BdRms Central East Rooms SqFt View West
## 1 10 1 2.5 3 0 0 8 2 0 1
predict(logFull, newdata=.NewData, interval="prediction", level=.95,
se.fit=FALSE)
## fit lwr upr
## 1 5.688628 5.646304 5.730952
logReduced <- stepwise(logFull, direction='backward/forward', criterion='BIC')
##
## Direction: backward/forward
## Criterion: BIC
##
## Start: AIC=-365.64
## InPrice ~ Age + AttGar + Baths + BdRms + East + Rooms + SqFt +
## View + West
##
## Df Sum of Sq RSS AIC
## - Age 1 0.00029 0.01554 -368.60
## <none> 0.01525 -365.64
## - BdRms 1 0.00212 0.01737 -363.03
## - East 1 0.01582 0.03108 -333.96
## - AttGar 1 0.01667 0.03192 -332.62
## - Baths 1 0.02119 0.03644 -326.00
## - West 1 0.02485 0.04010 -321.21
## - Rooms 1 0.07323 0.08848 -281.64
## - View 1 0.18533 0.20058 -240.72
## - SqFt 1 0.70616 0.72141 -176.72
##
## Step: AIC=-368.6
## InPrice ~ AttGar + Baths + BdRms + East + Rooms + SqFt + View +
## West
##
## Df Sum of Sq RSS AIC
## <none> 0.01554 -368.60
## - BdRms 1 0.00215 0.01769 -366.04
## + Age 1 0.00029 0.01525 -365.64
## - East 1 0.01602 0.03156 -337.10
## - AttGar 1 0.01686 0.03241 -335.78
## - Baths 1 0.02092 0.03646 -329.88
## - West 1 0.02466 0.04020 -325.00
## - Rooms 1 0.07540 0.09094 -284.18
## - View 1 0.19579 0.21133 -242.02
## - SqFt 1 0.76050 0.77604 -176.98
summary(logReduced)
##
## Call:
## lm(formula = InPrice ~ AttGar + Baths + BdRms + East + Rooms +
## SqFt + View + West, data = Appraise)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.036136 -0.011475 0.002987 0.012879 0.039952
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.480820 0.021257 210.795 < 2e-16 ***
## AttGar 0.044539 0.006678 6.670 4.83e-08 ***
## Baths -0.060977 0.008208 -7.429 4.12e-09 ***
## BdRms 0.014857 0.006239 2.381 0.022 *
## East -0.048380 0.007442 -6.501 8.40e-08 ***
## Rooms 0.073977 0.005245 14.103 < 2e-16 ***
## SqFt 0.312390 0.006974 44.791 < 2e-16 ***
## View 0.220688 0.009711 22.727 < 2e-16 ***
## West 0.053138 0.006588 8.065 5.39e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01947 on 41 degrees of freedom
## Multiple R-squared: 0.9955, Adjusted R-squared: 0.9946
## F-statistic: 1139 on 8 and 41 DF, p-value: < 2.2e-16
oldpar <- par(oma=c(0,0,3,0), mfrow=c(2,2))
plot(logReduced)

par(oldpar)
.NewData # Newdata
## Age AttGar Baths BdRms Central East Rooms SqFt View West
## 1 10 1 2.5 3 0 0 8 2 0 1
logPred <- predict(logReduced, newdata=.NewData, interval="prediction", level=.95,
se.fit=FALSE)
exp(logPred)
## fit lwr upr
## 1 295.0727 282.9233 307.7438