options(width=100)

# Insert your R script here
library(abind, pos=26)
library(e1071, pos=27)
library(car)
# library(Rcmdr)
library(RcmdrMisc)

# IMPORTANT
#   Path in following line should point to a local copy of the data file on
#   your computer
######cycle1
setwd("C:/Users/Noura/Desktop/regresstions")
Appraise <- read.table("Appraise10.csv", header=TRUE,
                       sep=",", na.strings="NA", dec=".", strip.white=TRUE)

numSummary(Appraise[,c("Price", "Age", "AttGar", "Baths", "BdRms", "Central", "East",
                       "Rooms", "SqFt", "View", "West")], statistics=c("mean", "sd", 
                       "quantiles", "skewness"), quantiles=c(0,.25,.5,.75,1), type="2")
##             mean         sd    skewness    0%     25%     50%    75%  100%  n
## Price   276.7200 78.1093007  1.19116525 162.0 226.000 262.000 312.25 559.0 50
## Age      11.4000  4.9321933  0.54153012   2.0   8.000  11.000  14.50  25.0 50
## AttGar    0.5200  0.5046720 -0.08256187   0.0   0.000   1.000   1.00   1.0 50
## Baths     1.8200  0.5225526 -0.22097460   1.0   2.000   2.000   2.00   3.0 50
## BdRms     2.9200  0.6006799  0.61487641   2.0   3.000   3.000   3.00   5.0 50
## Central   0.4800  0.5046720  0.08256187   0.0   0.000   0.000   1.00   1.0 50
## East      0.2200  0.4184520  1.39402701   0.0   0.000   0.000   0.00   1.0 50
## Rooms     7.1800  1.0631106  1.74810852   5.0   7.000   7.000   7.75  12.0 50
## SqFt      1.8964  0.5806005  0.02711705   0.8   1.525   1.945   2.24   3.3 50
## View      0.1000  0.3030458  2.74985970   0.0   0.000   0.000   0.00   1.0 50
## West      0.3000  0.4629100  0.90010287   0.0   0.000   0.000   1.00   1.0 50
cor(Appraise[,c("Price","Age","AttGar","Baths","BdRms","Central","East",
                "Rooms","SqFt","View","West")], use="complete")
##              Price         Age      AttGar       Baths        BdRms      Central         East
## Price    1.0000000  0.44172728  0.53753458  0.50374133  0.530174739  0.144297913 -0.324008181
## Age      0.4417273  1.00000000  0.22628911  0.28981102  0.334778078 -0.037714852 -0.053396401
## AttGar   0.5375346  0.22628911  1.00000000  0.20739556  0.274670324  0.201923077 -0.166217793
## Baths    0.5037413  0.28981102  0.20739556  1.00000000  0.343292832  0.024763648 -0.095198142
## BdRms    0.5301747  0.33477808  0.27467032  0.34329283  1.000000000 -0.005385693 -0.009743085
## Central  0.1442979 -0.03771485  0.20192308  0.02476365 -0.005385693  1.000000000 -0.510249968
## East    -0.3240082 -0.05339640 -0.16621779 -0.09519814 -0.009743085 -0.510249968  1.000000000
## Rooms    0.7944452  0.47250247  0.43058873  0.72076630  0.630215825  0.025865754 -0.136708697
## SqFt     0.8906853  0.47251220  0.48292092  0.55209353  0.437451041  0.128600610 -0.208354367
## View     0.3840106 -0.10923092  0.18681618 -0.01288741 -0.067267279  0.213504205 -0.177028335
## West     0.1355744  0.08938553 -0.06988566  0.05905754  0.014678924 -0.628970902 -0.347676748
##               Rooms        SqFt        View        West
## Price    0.79444522  0.89068534  0.38401056  0.13557436
## Age      0.47250247  0.47251220 -0.10923092  0.08938553
## AttGar   0.43058873  0.48292092  0.18681618 -0.06988566
## Baths    0.72076630  0.55209353 -0.01288741  0.05905754
## BdRms    0.63021583  0.43745104 -0.06726728  0.01467892
## Central  0.02586575  0.12860061  0.21350421 -0.62897090
## East    -0.13670870 -0.20835437 -0.17702833 -0.34767675
## Rooms    1.00000000  0.66796087  0.06968029  0.09537987
## SqFt     0.66796087  1.00000000  0.14011519  0.04814145
## View     0.06968029  0.14011519  1.00000000 -0.07273930
## West     0.09537987  0.04814145 -0.07273930  1.00000000
scatterplotMatrix(~Price+Age+Baths+BdRms+Rooms+SqFt+View+West,
                  reg.line=FALSE, smooth=FALSE, spread=FALSE, span=0.5, ellipse=FALSE, 
                  levels=c(.5, .9), id.n=0, diagonal = 'density', data=Appraise)

Full <- lm(Price~Age+AttGar+Baths+BdRms+East+Rooms+SqFt+View+West, 
           data=Appraise)
summary(Full)
## 
## Call:
## lm(formula = Price ~ Age + AttGar + Baths + BdRms + East + Rooms + 
##     SqFt + View + West, data = Appraise)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -23.211  -7.620  -2.309   3.620  37.911 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -91.0526    15.5667  -5.849 7.70e-07 ***
## Age          -0.1321     0.4948  -0.267  0.79079    
## AttGar        3.1798     4.8514   0.655  0.51594    
## Baths       -26.7888     6.0270  -4.445 6.81e-05 ***
## BdRms         8.1108     4.5297   1.791  0.08093 .  
## East        -15.4459     5.4071  -2.857  0.00676 ** 
## Rooms        32.2002     3.9236   8.207 4.16e-10 ***
## SqFt         81.7107     5.2852  15.460  < 2e-16 ***
## View         65.8995     7.1897   9.166 2.24e-11 ***
## West         11.1722     4.7879   2.333  0.02474 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.13 on 40 degrees of freedom
## Multiple R-squared:  0.9733, Adjusted R-squared:  0.9673 
## F-statistic: 161.8 on 9 and 40 DF,  p-value: < 2.2e-16
oldpar <- par(oma=c(0,0,3,0), mfrow=c(2,2))
plot(Full)

par(oldpar)
.NewData <- data.frame(Age=10, AttGar=1, Baths=2.5, BdRms=3, Central=0, 
                       East=0, Rooms=8, SqFt=2, View=0, West=1, row.names="1")
.NewData  # Newdata
##   Age AttGar Baths BdRms Central East Rooms SqFt View West
## 1  10      1   2.5     3       0    0     8    2    0    1
predict(Full, newdata=.NewData, interval="prediction", level=.95, 
        se.fit=FALSE)
##        fit      lwr      upr
## 1 300.3619 269.7245 330.9994
#####cycle2
Reduced <- stepwise(Full, direction='backward/forward', criterion='BIC')
## 
## Direction:  backward/forward
## Criterion:  BIC 
## 
## Start:  AIC=292.82
## Price ~ Age + AttGar + Baths + BdRms + East + Rooms + SqFt + 
##     View + West
## 
##          Df Sum of Sq   RSS    AIC
## - Age     1        14  8005 289.00
## - AttGar  1        86  8077 289.45
## - BdRms   1       641  8632 292.77
## <none>                 7991 292.82
## - West    1      1088  9079 295.29
## - East    1      1630  9621 298.19
## - Baths   1      3947 11938 308.98
## - Rooms   1     13455 21446 338.27
## - View    1     16784 24775 345.49
## - SqFt    1     47752 55743 386.03
## 
## Step:  AIC=289
## Price ~ AttGar + Baths + BdRms + East + Rooms + SqFt + View + 
##     West
## 
##          Df Sum of Sq   RSS    AIC
## - AttGar  1        89  8094 285.64
## - BdRms   1       644  8649 288.96
## <none>                 8005 289.00
## - West    1      1079  9084 291.41
## + Age     1        14  7991 292.82
## - East    1      1645  9650 294.43
## - Baths   1      3965 11971 305.21
## - Rooms   1     14062 22068 335.79
## - View    1     17659 25664 343.34
## - SqFt    1     51517 59522 385.40
## 
## Step:  AIC=285.64
## Price ~ Baths + BdRms + East + Rooms + SqFt + View + West
## 
##          Df Sum of Sq   RSS    AIC
## - BdRms   1       629  8723 285.47
## <none>                 8094 285.64
## - West    1      1010  9104 287.61
## + AttGar  1        89  8005 289.00
## + Age     1        17  8077 289.45
## - East    1      1746  9840 291.50
## - Baths   1      4411 12505 303.48
## - Rooms   1     15555 23649 335.34
## - View    1     18015 26109 340.28
## - SqFt    1     57674 65768 386.48
## 
## Step:  AIC=285.47
## Price ~ Baths + East + Rooms + SqFt + View + West
## 
##          Df Sum of Sq   RSS    AIC
## <none>                 8723 285.47
## + BdRms   1       629  8094 285.64
## - West    1       936  9659 286.65
## + AttGar  1        74  8649 288.96
## + Age     1        20  8702 289.26
## - East    1      1618 10341 290.06
## - Baths   1      5535 14258 306.12
## - View    1     17393 26115 336.38
## - Rooms   1     26876 35599 351.87
## - SqFt    1     59308 68031 384.26
summary(Reduced)
## 
## Call:
## lm(formula = Price ~ Baths + East + Rooms + SqFt + View + West, 
##     data = Appraise)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -24.109  -7.486  -0.972   2.737  36.784 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -91.907     15.001  -6.127 2.39e-07 ***
## Baths        -29.810      5.707  -5.224 4.85e-06 ***
## East         -15.256      5.402  -2.824  0.00716 ** 
## Rooms         36.093      3.136  11.510 1.02e-14 ***
## SqFt          83.087      4.859  17.099  < 2e-16 ***
## View          64.595      6.976   9.260 8.45e-12 ***
## West          10.222      4.758   2.148  0.03735 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.24 on 43 degrees of freedom
## Multiple R-squared:  0.9708, Adjusted R-squared:  0.9668 
## F-statistic: 238.5 on 6 and 43 DF,  p-value: < 2.2e-16
oldpar <- par(oma=c(0,0,3,0), mfrow=c(2,2))
plot(Reduced)

par(oldpar)
.NewData  # Newdata
##   Age AttGar Baths BdRms Central East Rooms SqFt View West
## 1  10      1   2.5     3       0    0     8    2    0    1
predict(Reduced, newdata=.NewData, interval="prediction", level=.95, 
        se.fit=FALSE)
##        fit     lwr      upr
## 1 298.7051 268.494 328.9161
######## Cycle 3
Appraise$InPrice <- log(Appraise$Price)

numSummary(Appraise[,c("InPrice", "Age", "AttGar", "Baths", "BdRms", "Central", "East",
                       "Rooms", "SqFt", "View", "West")], statistics=c("mean", "sd", 
                       "quantiles", "skewness"), quantiles=c(0,.25,.5,.75,1), type="2")
##              mean        sd    skewness       0%      25%       50%       75%      100%  n
## InPrice  5.587323 0.2661106  0.33625981 5.087596 5.420535  5.568337  5.743757  6.326149 50
## Age     11.400000 4.9321933  0.54153012 2.000000 8.000000 11.000000 14.500000 25.000000 50
## AttGar   0.520000 0.5046720 -0.08256187 0.000000 0.000000  1.000000  1.000000  1.000000 50
## Baths    1.820000 0.5225526 -0.22097460 1.000000 2.000000  2.000000  2.000000  3.000000 50
## BdRms    2.920000 0.6006799  0.61487641 2.000000 3.000000  3.000000  3.000000  5.000000 50
## Central  0.480000 0.5046720  0.08256187 0.000000 0.000000  0.000000  1.000000  1.000000 50
## East     0.220000 0.4184520  1.39402701 0.000000 0.000000  0.000000  0.000000  1.000000 50
## Rooms    7.180000 1.0631106  1.74810852 5.000000 7.000000  7.000000  7.750000 12.000000 50
## SqFt     1.896400 0.5806005  0.02711705 0.800000 1.525000  1.945000  2.240000  3.300000 50
## View     0.100000 0.3030458  2.74985970 0.000000 0.000000  0.000000  0.000000  1.000000 50
## West     0.300000 0.4629100  0.90010287 0.000000 0.000000  0.000000  1.000000  1.000000 50
cor(Appraise[,c("InPrice","Age","AttGar","Baths","BdRms","Central","East",
                "Rooms","SqFt","View","West")], use="complete")
##            InPrice         Age      AttGar       Baths        BdRms      Central         East
## InPrice  1.0000000  0.43462221  0.57838257  0.50806017  0.485231606  0.143541330 -0.338083936
## Age      0.4346222  1.00000000  0.22628911  0.28981102  0.334778078 -0.037714852 -0.053396401
## AttGar   0.5783826  0.22628911  1.00000000  0.20739556  0.274670324  0.201923077 -0.166217793
## Baths    0.5080602  0.28981102  0.20739556  1.00000000  0.343292832  0.024763648 -0.095198142
## BdRms    0.4852316  0.33477808  0.27467032  0.34329283  1.000000000 -0.005385693 -0.009743085
## Central  0.1435413 -0.03771485  0.20192308  0.02476365 -0.005385693  1.000000000 -0.510249968
## East    -0.3380839 -0.05339640 -0.16621779 -0.09519814 -0.009743085 -0.510249968  1.000000000
## Rooms    0.7587326  0.47250247  0.43058873  0.72076630  0.630215825  0.025865754 -0.136708697
## SqFt     0.9238494  0.47251220  0.48292092  0.55209353  0.437451041  0.128600610 -0.208354367
## View     0.3892219 -0.10923092  0.18681618 -0.01288741 -0.067267279  0.213504205 -0.177028335
## West     0.1491231  0.08938553 -0.06988566  0.05905754  0.014678924 -0.628970902 -0.347676748
##               Rooms        SqFt        View        West
## InPrice  0.75873257  0.92384943  0.38922188  0.14912311
## Age      0.47250247  0.47251220 -0.10923092  0.08938553
## AttGar   0.43058873  0.48292092  0.18681618 -0.06988566
## Baths    0.72076630  0.55209353 -0.01288741  0.05905754
## BdRms    0.63021583  0.43745104 -0.06726728  0.01467892
## Central  0.02586575  0.12860061  0.21350421 -0.62897090
## East    -0.13670870 -0.20835437 -0.17702833 -0.34767675
## Rooms    1.00000000  0.66796087  0.06968029  0.09537987
## SqFt     0.66796087  1.00000000  0.14011519  0.04814145
## View     0.06968029  0.14011519  1.00000000 -0.07273930
## West     0.09537987  0.04814145 -0.07273930  1.00000000
scatterplotMatrix(~InPrice+Baths+East+Rooms+SqFt+View+West,
                  reg.line=FALSE, smooth=FALSE, spread=FALSE, span=0.5, ellipse=FALSE, 
                  levels=c(.5, .9), id.n=0, diagonal = 'density', data=Appraise)

logFull <- lm(InPrice~Age+AttGar+Baths+BdRms+East+Rooms+SqFt+View+West, 
           data=Appraise)
summary(logFull)
## 
## Call:
## lm(formula = InPrice ~ Age + AttGar + Baths + BdRms + East + 
##     Rooms + SqFt + View + West, data = Appraise)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.036027 -0.009418  0.002833  0.011185  0.041543 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.4783498  0.0215045 208.252  < 2e-16 ***
## Age         -0.0005976  0.0006835  -0.874   0.3871    
## AttGar       0.0443142  0.0067020   6.612 6.53e-08 ***
## Baths       -0.0620683  0.0083260  -7.455 4.40e-09 ***
## BdRms        0.0147703  0.0062576   2.360   0.0232 *  
## East        -0.0481231  0.0074696  -6.443 1.13e-07 ***
## Rooms        0.0751194  0.0054203  13.859  < 2e-16 ***
## SqFt         0.3142207  0.0073012  43.037  < 2e-16 ***
## View         0.2189815  0.0099322  22.048  < 2e-16 ***
## West         0.0534033  0.0066143   8.074 6.28e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01953 on 40 degrees of freedom
## Multiple R-squared:  0.9956, Adjusted R-squared:  0.9946 
## F-statistic:  1007 on 9 and 40 DF,  p-value: < 2.2e-16
oldpar <- par(oma=c(0,0,3,0), mfrow=c(2,2))
plot(logFull)

par(oldpar)
.NewData <- data.frame(Age=10, AttGar=1, Baths=2.5, BdRms=3, Central=0, 
                       East=0, Rooms=8, SqFt=2, View=0, West=1, row.names="1")
.NewData  # Newdata
##   Age AttGar Baths BdRms Central East Rooms SqFt View West
## 1  10      1   2.5     3       0    0     8    2    0    1
predict(logFull, newdata=.NewData, interval="prediction", level=.95, 
        se.fit=FALSE)
##        fit      lwr      upr
## 1 5.688628 5.646304 5.730952
logReduced <- stepwise(logFull, direction='backward/forward', criterion='BIC')
## 
## Direction:  backward/forward
## Criterion:  BIC 
## 
## Start:  AIC=-365.64
## InPrice ~ Age + AttGar + Baths + BdRms + East + Rooms + SqFt + 
##     View + West
## 
##          Df Sum of Sq     RSS     AIC
## - Age     1   0.00029 0.01554 -368.60
## <none>                0.01525 -365.64
## - BdRms   1   0.00212 0.01737 -363.03
## - East    1   0.01582 0.03108 -333.96
## - AttGar  1   0.01667 0.03192 -332.62
## - Baths   1   0.02119 0.03644 -326.00
## - West    1   0.02485 0.04010 -321.21
## - Rooms   1   0.07323 0.08848 -281.64
## - View    1   0.18533 0.20058 -240.72
## - SqFt    1   0.70616 0.72141 -176.72
## 
## Step:  AIC=-368.6
## InPrice ~ AttGar + Baths + BdRms + East + Rooms + SqFt + View + 
##     West
## 
##          Df Sum of Sq     RSS     AIC
## <none>                0.01554 -368.60
## - BdRms   1   0.00215 0.01769 -366.04
## + Age     1   0.00029 0.01525 -365.64
## - East    1   0.01602 0.03156 -337.10
## - AttGar  1   0.01686 0.03241 -335.78
## - Baths   1   0.02092 0.03646 -329.88
## - West    1   0.02466 0.04020 -325.00
## - Rooms   1   0.07540 0.09094 -284.18
## - View    1   0.19579 0.21133 -242.02
## - SqFt    1   0.76050 0.77604 -176.98
summary(logReduced)
## 
## Call:
## lm(formula = InPrice ~ AttGar + Baths + BdRms + East + Rooms + 
##     SqFt + View + West, data = Appraise)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.036136 -0.011475  0.002987  0.012879  0.039952 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.480820   0.021257 210.795  < 2e-16 ***
## AttGar       0.044539   0.006678   6.670 4.83e-08 ***
## Baths       -0.060977   0.008208  -7.429 4.12e-09 ***
## BdRms        0.014857   0.006239   2.381    0.022 *  
## East        -0.048380   0.007442  -6.501 8.40e-08 ***
## Rooms        0.073977   0.005245  14.103  < 2e-16 ***
## SqFt         0.312390   0.006974  44.791  < 2e-16 ***
## View         0.220688   0.009711  22.727  < 2e-16 ***
## West         0.053138   0.006588   8.065 5.39e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01947 on 41 degrees of freedom
## Multiple R-squared:  0.9955, Adjusted R-squared:  0.9946 
## F-statistic:  1139 on 8 and 41 DF,  p-value: < 2.2e-16
oldpar <- par(oma=c(0,0,3,0), mfrow=c(2,2))
plot(logReduced)

par(oldpar)
.NewData  # Newdata
##   Age AttGar Baths BdRms Central East Rooms SqFt View West
## 1  10      1   2.5     3       0    0     8    2    0    1
logPred <- predict(logReduced, newdata=.NewData, interval="prediction", level=.95, 
                   se.fit=FALSE)
exp(logPred)
##        fit      lwr      upr
## 1 295.0727 282.9233 307.7438