Row

Problem 1

Using R, generate a random variable X that has 10,000 random uniform numbers from 1 to N, where N can be any number of your choosing greater than or equal to 6. Then generate a random variable Y that has 10,000 random normal numbers with a mean of \(\mu =\sigma =(N+1)/2\)

set.seed(223)
MAX=8
X=runif(10000,min=1,max=MAX)
x <- median(X)

Y=rnorm(10000,mean=(MAX+1)/2,sd=(MAX+1)/2)
y <- quantile(Y,.25)

# median value for x
x
## [1] 4.506875
# first quantile for y
y
##      25% 
## 1.412316
all.data <- data.frame(cbind(X,Y))
head(all.data)
##          X         Y
## 1 6.770470  7.509567
## 2 3.284944 10.529317
## 3 2.057999 10.055423
## 4 4.522908  6.825093
## 5 4.223694 -2.546282
## 6 7.249178  7.524893
summary(all.data)
##        X               Y          
##  Min.   :1.001   Min.   :-13.978  
##  1st Qu.:2.767   1st Qu.:  1.412  
##  Median :4.507   Median :  4.487  
##  Mean   :4.497   Mean   :  4.480  
##  3rd Qu.:6.236   3rd Qu.:  7.535  
##  Max.   :7.998   Max.   : 22.674
hist(X)

hist(Y)

Probability. Calculate as a minimum the below probabilities a through c. Assume the small letter “x” is estimated as the median of the X variable, and the small letter “y” is estimated as the 1st quartile of the Y variable. Interpret the meaning of all probabilities. 5 points a. P(X>x | X>y) b. P(X>x, Y>y) c. P(X<x | X>y)

Reading this, X should be 50% since it is mostly equally distributed according to the histo graph above.

  1. P(X>x | X>y)

P(A|B) = P(A and B)/P(B) P(X>x|Y>y) = P(X>x and X>y)/P(X>y)

first quantile for y 4.824869 median for x 5.544612

X.greater.x.and.X.greater.y <- nrow(subset(all.data, all.data$X > x & all.data$X > y))/nrow(all.data)
X.greater.x.and.X.greater.y
## [1] 0.5
X.greater.y <- nrow(subset(all.data, all.data$X > y))/nrow(all.data)
X.greater.y
## [1] 0.9416
# This is the probability that X is greater than the median of 5.55 and X is greater than 4.82, given that X is greater than 4.82

X.greater.x.and.X.greater.y/X.greater.y
## [1] 0.531011
  1. P(X>x, Y>y)
nrow(subset(all.data, all.data$X > x & all.data$Y > y))/nrow(all.data)
## [1] 0.3762
  1. P(X<x | X>y)

P (X is less than 5.55 given that X is greater than 4.83)

#nrow(subset(all.data, all.data$X < x & all.data$X > y) )/nrow(all.data)
data.X.greater.Y <- subset(all.data,all.data$X > y)
nrow(data.X.greater.Y)
## [1] 9416
# number of items where X > y
nrow(subset(data.X.greater.Y,data.X.greater.Y$X < x))
## [1] 4416
# of these X greater than y, how many are less than the median, x

nrow(subset(data.X.greater.Y,X < x & X>y))/nrow(all.data)
## [1] 0.4416

5 points. Investigate whether P(X>x and Y>y)=P(X>x)P(Y>y) by building a table and evaluating the marginal and joint probabilities.

X.greater.x <- subset(all.data, X > x)
Y.greater.y <- subset(all.data, Y > y)
X.less.x <- subset(all.data, X < x)
Y.less.y <- subset(all.data, Y < y)

df <- matrix(c(nrow(X.greater.x), nrow(X.less.x),sum(nrow(X.greater.x), nrow(X.less.x)), nrow(Y.greater.y ), nrow(Y.less.y),sum(nrow(Y.greater.y ), nrow(Y.less.y)),sum(nrow(X.greater.x),nrow(Y.greater.y )),sum(nrow(X.less.x),nrow(Y.less.y)),20000), 3, 3,
            byrow =  TRUE )

colnames(df) <- c("X>x", "X<x", "total")
row.names(df) <- c("Y<y", "Y>y", "total")
df/20000
##         X>x   X<x total
## Y<y   0.250 0.250   0.5
## Y>y   0.375 0.125   0.5
## total 0.625 0.375   1.0
# P(X>x and Y>y)
P.Xgreaterx.and.Ygreatery <- nrow(subset(all.data, X> x & Y>y))/10000
#P(X>x)
P.Xgreater.x <- nrow(X.greater.x)/10000
#P(Y>y)
P.Ygreater.y <- nrow(Y.greater.y)/10000
round(P.Xgreater.x * P.Ygreater.y,3)
## [1] 0.375
round(P.Xgreaterx.and.Ygreatery,3)
## [1] 0.376
# the numbers are pretty similar, if we round them to 2 decimals, they are equal
# P(X>x and Y>y)=P(X>x)P(Y>y)
round(P.Xgreater.x * P.Ygreater.y,2)
## [1] 0.38
round(P.Xgreaterx.and.Ygreatery,2)
## [1] 0.38
round(P.Xgreater.x * P.Ygreater.y,2)==round(P.Xgreaterx.and.Ygreatery,2)
## [1] TRUE

5 points. Check to see if independence holds by using Fisher’s Exact Test and the Chi Square Test. What is the difference between the two? Which is most appropriate?

Chisquare is used when the data is larger. If I try to se the fisher test, I will get the following error:

Error in fisher.test(df) : FEXACT error 501. The hash table key cannot be computed because the largest key is larger than the largest representable int. The algorithm cannot proceed. Reduce the workspace, consider using ‘simulate.p.value=TRUE’ or another algorithm.

Therefore, the chi-sqaure test makes sense in this scenario due to the table size.

chisq.test(df)
## 
##  Pearson's Chi-squared test
## 
## data:  df
## X-squared = 1333.3, df = 4, p-value < 2.2e-16
fisher.test(df,simulate.p.value=TRUE)
## 
##  Fisher's Exact Test for Count Data with simulated p-value (based
##  on 2000 replicates)
## 
## data:  df
## p-value = 0.0004998
## alternative hypothesis: two.sided

Problem 2

Data Load

# Data Load
train_file = "https://raw.githubusercontent.com/dapolloxp/R-Projects/master/train.csv"
test_file = "https://raw.githubusercontent.com/dapolloxp/R-Projects/master/test.csv"
pullCSVFile <- function (url) 
{
  if(url.exists(url))
  {
    expr <- tryCatch(
      {
        message(paste("CSV File ", url, " exists"))
        message("Downloading CSV File from github...\nThis may take a while....")
        out_file <- getURL(url)
        csv_file <- read.csv(text = out_file)
      },
    error = function(err)
    {
      print(paste("Couldn't download file: ", err))
    },
    finally = {
      return(csv_file)
    }
    )
  }
  else
  {
    print(paste("Could not location CSV file at: ", url))
    break()
  }
}
train_set <- pullCSVFile(train_file)
## CSV File  https://raw.githubusercontent.com/dapolloxp/R-Projects/master/train.csv  exists
## Downloading CSV File from github...
## This may take a while....
test_set <- pullCSVFile(test_file)
## CSV File  https://raw.githubusercontent.com/dapolloxp/R-Projects/master/test.csv  exists
## Downloading CSV File from github...
## This may take a while....
head(train_set)
##   Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape
## 1  1         60       RL          65    8450   Pave  <NA>      Reg
## 2  2         20       RL          80    9600   Pave  <NA>      Reg
## 3  3         60       RL          68   11250   Pave  <NA>      IR1
## 4  4         70       RL          60    9550   Pave  <NA>      IR1
## 5  5         60       RL          84   14260   Pave  <NA>      IR1
## 6  6         50       RL          85   14115   Pave  <NA>      IR1
##   LandContour Utilities LotConfig LandSlope Neighborhood Condition1
## 1         Lvl    AllPub    Inside       Gtl      CollgCr       Norm
## 2         Lvl    AllPub       FR2       Gtl      Veenker      Feedr
## 3         Lvl    AllPub    Inside       Gtl      CollgCr       Norm
## 4         Lvl    AllPub    Corner       Gtl      Crawfor       Norm
## 5         Lvl    AllPub       FR2       Gtl      NoRidge       Norm
## 6         Lvl    AllPub    Inside       Gtl      Mitchel       Norm
##   Condition2 BldgType HouseStyle OverallQual OverallCond YearBuilt
## 1       Norm     1Fam     2Story           7           5      2003
## 2       Norm     1Fam     1Story           6           8      1976
## 3       Norm     1Fam     2Story           7           5      2001
## 4       Norm     1Fam     2Story           7           5      1915
## 5       Norm     1Fam     2Story           8           5      2000
## 6       Norm     1Fam     1.5Fin           5           5      1993
##   YearRemodAdd RoofStyle RoofMatl Exterior1st Exterior2nd MasVnrType
## 1         2003     Gable  CompShg     VinylSd     VinylSd    BrkFace
## 2         1976     Gable  CompShg     MetalSd     MetalSd       None
## 3         2002     Gable  CompShg     VinylSd     VinylSd    BrkFace
## 4         1970     Gable  CompShg     Wd Sdng     Wd Shng       None
## 5         2000     Gable  CompShg     VinylSd     VinylSd    BrkFace
## 6         1995     Gable  CompShg     VinylSd     VinylSd       None
##   MasVnrArea ExterQual ExterCond Foundation BsmtQual BsmtCond BsmtExposure
## 1        196        Gd        TA      PConc       Gd       TA           No
## 2          0        TA        TA     CBlock       Gd       TA           Gd
## 3        162        Gd        TA      PConc       Gd       TA           Mn
## 4          0        TA        TA     BrkTil       TA       Gd           No
## 5        350        Gd        TA      PConc       Gd       TA           Av
## 6          0        TA        TA       Wood       Gd       TA           No
##   BsmtFinType1 BsmtFinSF1 BsmtFinType2 BsmtFinSF2 BsmtUnfSF TotalBsmtSF
## 1          GLQ        706          Unf          0       150         856
## 2          ALQ        978          Unf          0       284        1262
## 3          GLQ        486          Unf          0       434         920
## 4          ALQ        216          Unf          0       540         756
## 5          GLQ        655          Unf          0       490        1145
## 6          GLQ        732          Unf          0        64         796
##   Heating HeatingQC CentralAir Electrical X1stFlrSF X2ndFlrSF LowQualFinSF
## 1    GasA        Ex          Y      SBrkr       856       854            0
## 2    GasA        Ex          Y      SBrkr      1262         0            0
## 3    GasA        Ex          Y      SBrkr       920       866            0
## 4    GasA        Gd          Y      SBrkr       961       756            0
## 5    GasA        Ex          Y      SBrkr      1145      1053            0
## 6    GasA        Ex          Y      SBrkr       796       566            0
##   GrLivArea BsmtFullBath BsmtHalfBath FullBath HalfBath BedroomAbvGr
## 1      1710            1            0        2        1            3
## 2      1262            0            1        2        0            3
## 3      1786            1            0        2        1            3
## 4      1717            1            0        1        0            3
## 5      2198            1            0        2        1            4
## 6      1362            1            0        1        1            1
##   KitchenAbvGr KitchenQual TotRmsAbvGrd Functional Fireplaces FireplaceQu
## 1            1          Gd            8        Typ          0        <NA>
## 2            1          TA            6        Typ          1          TA
## 3            1          Gd            6        Typ          1          TA
## 4            1          Gd            7        Typ          1          Gd
## 5            1          Gd            9        Typ          1          TA
## 6            1          TA            5        Typ          0        <NA>
##   GarageType GarageYrBlt GarageFinish GarageCars GarageArea GarageQual
## 1     Attchd        2003          RFn          2        548         TA
## 2     Attchd        1976          RFn          2        460         TA
## 3     Attchd        2001          RFn          2        608         TA
## 4     Detchd        1998          Unf          3        642         TA
## 5     Attchd        2000          RFn          3        836         TA
## 6     Attchd        1993          Unf          2        480         TA
##   GarageCond PavedDrive WoodDeckSF OpenPorchSF EnclosedPorch X3SsnPorch
## 1         TA          Y          0          61             0          0
## 2         TA          Y        298           0             0          0
## 3         TA          Y          0          42             0          0
## 4         TA          Y          0          35           272          0
## 5         TA          Y        192          84             0          0
## 6         TA          Y         40          30             0        320
##   ScreenPorch PoolArea PoolQC Fence MiscFeature MiscVal MoSold YrSold
## 1           0        0   <NA>  <NA>        <NA>       0      2   2008
## 2           0        0   <NA>  <NA>        <NA>       0      5   2007
## 3           0        0   <NA>  <NA>        <NA>       0      9   2008
## 4           0        0   <NA>  <NA>        <NA>       0      2   2006
## 5           0        0   <NA>  <NA>        <NA>       0     12   2008
## 6           0        0   <NA> MnPrv        Shed     700     10   2009
##   SaleType SaleCondition SalePrice
## 1       WD        Normal    208500
## 2       WD        Normal    181500
## 3       WD        Normal    223500
## 4       WD       Abnorml    140000
## 5       WD        Normal    250000
## 6       WD        Normal    143000
head(test_set)
##     Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape
## 1 1461         20       RH          80   11622   Pave  <NA>      Reg
## 2 1462         20       RL          81   14267   Pave  <NA>      IR1
## 3 1463         60       RL          74   13830   Pave  <NA>      IR1
## 4 1464         60       RL          78    9978   Pave  <NA>      IR1
## 5 1465        120       RL          43    5005   Pave  <NA>      IR1
## 6 1466         60       RL          75   10000   Pave  <NA>      IR1
##   LandContour Utilities LotConfig LandSlope Neighborhood Condition1
## 1         Lvl    AllPub    Inside       Gtl        NAmes      Feedr
## 2         Lvl    AllPub    Corner       Gtl        NAmes       Norm
## 3         Lvl    AllPub    Inside       Gtl      Gilbert       Norm
## 4         Lvl    AllPub    Inside       Gtl      Gilbert       Norm
## 5         HLS    AllPub    Inside       Gtl      StoneBr       Norm
## 6         Lvl    AllPub    Corner       Gtl      Gilbert       Norm
##   Condition2 BldgType HouseStyle OverallQual OverallCond YearBuilt
## 1       Norm     1Fam     1Story           5           6      1961
## 2       Norm     1Fam     1Story           6           6      1958
## 3       Norm     1Fam     2Story           5           5      1997
## 4       Norm     1Fam     2Story           6           6      1998
## 5       Norm   TwnhsE     1Story           8           5      1992
## 6       Norm     1Fam     2Story           6           5      1993
##   YearRemodAdd RoofStyle RoofMatl Exterior1st Exterior2nd MasVnrType
## 1         1961     Gable  CompShg     VinylSd     VinylSd       None
## 2         1958       Hip  CompShg     Wd Sdng     Wd Sdng    BrkFace
## 3         1998     Gable  CompShg     VinylSd     VinylSd       None
## 4         1998     Gable  CompShg     VinylSd     VinylSd    BrkFace
## 5         1992     Gable  CompShg     HdBoard     HdBoard       None
## 6         1994     Gable  CompShg     HdBoard     HdBoard       None
##   MasVnrArea ExterQual ExterCond Foundation BsmtQual BsmtCond BsmtExposure
## 1          0        TA        TA     CBlock       TA       TA           No
## 2        108        TA        TA     CBlock       TA       TA           No
## 3          0        TA        TA      PConc       Gd       TA           No
## 4         20        TA        TA      PConc       TA       TA           No
## 5          0        Gd        TA      PConc       Gd       TA           No
## 6          0        TA        TA      PConc       Gd       TA           No
##   BsmtFinType1 BsmtFinSF1 BsmtFinType2 BsmtFinSF2 BsmtUnfSF TotalBsmtSF
## 1          Rec        468          LwQ        144       270         882
## 2          ALQ        923          Unf          0       406        1329
## 3          GLQ        791          Unf          0       137         928
## 4          GLQ        602          Unf          0       324         926
## 5          ALQ        263          Unf          0      1017        1280
## 6          Unf          0          Unf          0       763         763
##   Heating HeatingQC CentralAir Electrical X1stFlrSF X2ndFlrSF LowQualFinSF
## 1    GasA        TA          Y      SBrkr       896         0            0
## 2    GasA        TA          Y      SBrkr      1329         0            0
## 3    GasA        Gd          Y      SBrkr       928       701            0
## 4    GasA        Ex          Y      SBrkr       926       678            0
## 5    GasA        Ex          Y      SBrkr      1280         0            0
## 6    GasA        Gd          Y      SBrkr       763       892            0
##   GrLivArea BsmtFullBath BsmtHalfBath FullBath HalfBath BedroomAbvGr
## 1       896            0            0        1        0            2
## 2      1329            0            0        1        1            3
## 3      1629            0            0        2        1            3
## 4      1604            0            0        2        1            3
## 5      1280            0            0        2        0            2
## 6      1655            0            0        2        1            3
##   KitchenAbvGr KitchenQual TotRmsAbvGrd Functional Fireplaces FireplaceQu
## 1            1          TA            5        Typ          0        <NA>
## 2            1          Gd            6        Typ          0        <NA>
## 3            1          TA            6        Typ          1          TA
## 4            1          Gd            7        Typ          1          Gd
## 5            1          Gd            5        Typ          0        <NA>
## 6            1          TA            7        Typ          1          TA
##   GarageType GarageYrBlt GarageFinish GarageCars GarageArea GarageQual
## 1     Attchd        1961          Unf          1        730         TA
## 2     Attchd        1958          Unf          1        312         TA
## 3     Attchd        1997          Fin          2        482         TA
## 4     Attchd        1998          Fin          2        470         TA
## 5     Attchd        1992          RFn          2        506         TA
## 6     Attchd        1993          Fin          2        440         TA
##   GarageCond PavedDrive WoodDeckSF OpenPorchSF EnclosedPorch X3SsnPorch
## 1         TA          Y        140           0             0          0
## 2         TA          Y        393          36             0          0
## 3         TA          Y        212          34             0          0
## 4         TA          Y        360          36             0          0
## 5         TA          Y          0          82             0          0
## 6         TA          Y        157          84             0          0
##   ScreenPorch PoolArea PoolQC Fence MiscFeature MiscVal MoSold YrSold
## 1         120        0   <NA> MnPrv        <NA>       0      6   2010
## 2           0        0   <NA>  <NA>        Gar2   12500      6   2010
## 3           0        0   <NA> MnPrv        <NA>       0      3   2010
## 4           0        0   <NA>  <NA>        <NA>       0      6   2010
## 5         144        0   <NA>  <NA>        <NA>       0      1   2010
## 6           0        0   <NA>  <NA>        <NA>       0      4   2010
##   SaleType SaleCondition
## 1       WD        Normal
## 2       WD        Normal
## 3       WD        Normal
## 4       WD        Normal
## 5       WD        Normal
## 6       WD        Normal

5 points. Descriptive and Inferential Statistics. Provide univariate descriptive statistics and appropriate plots for the training data set. Provide a scatterplot matrix for at least two of the independent variables and the dependent variable. Derive a correlation matrix for any three quantitative variables in the dataset. Test the hypotheses that the correlations between each pairwise set of variables is 0 and provide an 80% confidence interval. Discuss the meaning of your analysis. Would you be worried about familywise error? Why or why not?

For this problem, I choose the following variables:

  • Total Basement Area
  • Total Garage Area
  • Greater Living Area
  • SalePrice

Below is the summary for the entire set of variables in the training set. I have choosen to do a scatter for the following variables:

summary(train_set)
##        Id           MSSubClass       MSZoning     LotFrontage    
##  Min.   :   1.0   Min.   : 20.0   C (all):  10   Min.   : 21.00  
##  1st Qu.: 365.8   1st Qu.: 20.0   FV     :  65   1st Qu.: 59.00  
##  Median : 730.5   Median : 50.0   RH     :  16   Median : 69.00  
##  Mean   : 730.5   Mean   : 56.9   RL     :1151   Mean   : 70.05  
##  3rd Qu.:1095.2   3rd Qu.: 70.0   RM     : 218   3rd Qu.: 80.00  
##  Max.   :1460.0   Max.   :190.0                  Max.   :313.00  
##                                                  NA's   :259     
##     LotArea        Street      Alley      LotShape  LandContour
##  Min.   :  1300   Grvl:   6   Grvl:  50   IR1:484   Bnk:  63   
##  1st Qu.:  7554   Pave:1454   Pave:  41   IR2: 41   HLS:  50   
##  Median :  9478               NA's:1369   IR3: 10   Low:  36   
##  Mean   : 10517                           Reg:925   Lvl:1311   
##  3rd Qu.: 11602                                                
##  Max.   :215245                                                
##                                                                
##   Utilities      LotConfig    LandSlope   Neighborhood   Condition1  
##  AllPub:1459   Corner : 263   Gtl:1382   NAmes  :225   Norm   :1260  
##  NoSeWa:   1   CulDSac:  94   Mod:  65   CollgCr:150   Feedr  :  81  
##                FR2    :  47   Sev:  13   OldTown:113   Artery :  48  
##                FR3    :   4              Edwards:100   RRAn   :  26  
##                Inside :1052              Somerst: 86   PosN   :  19  
##                                          Gilbert: 79   RRAe   :  11  
##                                          (Other):707   (Other):  15  
##    Condition2     BldgType      HouseStyle   OverallQual    
##  Norm   :1445   1Fam  :1220   1Story :726   Min.   : 1.000  
##  Feedr  :   6   2fmCon:  31   2Story :445   1st Qu.: 5.000  
##  Artery :   2   Duplex:  52   1.5Fin :154   Median : 6.000  
##  PosN   :   2   Twnhs :  43   SLvl   : 65   Mean   : 6.099  
##  RRNn   :   2   TwnhsE: 114   SFoyer : 37   3rd Qu.: 7.000  
##  PosA   :   1                 1.5Unf : 14   Max.   :10.000  
##  (Other):   2                 (Other): 19                   
##   OverallCond      YearBuilt     YearRemodAdd    RoofStyle   
##  Min.   :1.000   Min.   :1872   Min.   :1950   Flat   :  13  
##  1st Qu.:5.000   1st Qu.:1954   1st Qu.:1967   Gable  :1141  
##  Median :5.000   Median :1973   Median :1994   Gambrel:  11  
##  Mean   :5.575   Mean   :1971   Mean   :1985   Hip    : 286  
##  3rd Qu.:6.000   3rd Qu.:2000   3rd Qu.:2004   Mansard:   7  
##  Max.   :9.000   Max.   :2010   Max.   :2010   Shed   :   2  
##                                                              
##     RoofMatl     Exterior1st   Exterior2nd    MasVnrType    MasVnrArea    
##  CompShg:1434   VinylSd:515   VinylSd:504   BrkCmn : 15   Min.   :   0.0  
##  Tar&Grv:  11   HdBoard:222   MetalSd:214   BrkFace:445   1st Qu.:   0.0  
##  WdShngl:   6   MetalSd:220   HdBoard:207   None   :864   Median :   0.0  
##  WdShake:   5   Wd Sdng:206   Wd Sdng:197   Stone  :128   Mean   : 103.7  
##  ClyTile:   1   Plywood:108   Plywood:142   NA's   :  8   3rd Qu.: 166.0  
##  Membran:   1   CemntBd: 61   CmentBd: 60                 Max.   :1600.0  
##  (Other):   2   (Other):128   (Other):136                 NA's   :8       
##  ExterQual ExterCond  Foundation  BsmtQual   BsmtCond    BsmtExposure
##  Ex: 52    Ex:   3   BrkTil:146   Ex  :121   Fa  :  45   Av  :221    
##  Fa: 14    Fa:  28   CBlock:634   Fa  : 35   Gd  :  65   Gd  :134    
##  Gd:488    Gd: 146   PConc :647   Gd  :618   Po  :   2   Mn  :114    
##  TA:906    Po:   1   Slab  : 24   TA  :649   TA  :1311   No  :953    
##            TA:1282   Stone :  6   NA's: 37   NA's:  37   NA's: 38    
##                      Wood  :  3                                      
##                                                                      
##  BsmtFinType1   BsmtFinSF1     BsmtFinType2   BsmtFinSF2     
##  ALQ :220     Min.   :   0.0   ALQ :  19    Min.   :   0.00  
##  BLQ :148     1st Qu.:   0.0   BLQ :  33    1st Qu.:   0.00  
##  GLQ :418     Median : 383.5   GLQ :  14    Median :   0.00  
##  LwQ : 74     Mean   : 443.6   LwQ :  46    Mean   :  46.55  
##  Rec :133     3rd Qu.: 712.2   Rec :  54    3rd Qu.:   0.00  
##  Unf :430     Max.   :5644.0   Unf :1256    Max.   :1474.00  
##  NA's: 37                      NA's:  38                     
##    BsmtUnfSF       TotalBsmtSF      Heating     HeatingQC CentralAir
##  Min.   :   0.0   Min.   :   0.0   Floor:   1   Ex:741    N:  95    
##  1st Qu.: 223.0   1st Qu.: 795.8   GasA :1428   Fa: 49    Y:1365    
##  Median : 477.5   Median : 991.5   GasW :  18   Gd:241              
##  Mean   : 567.2   Mean   :1057.4   Grav :   7   Po:  1              
##  3rd Qu.: 808.0   3rd Qu.:1298.2   OthW :   2   TA:428              
##  Max.   :2336.0   Max.   :6110.0   Wall :   4                       
##                                                                     
##  Electrical     X1stFlrSF      X2ndFlrSF     LowQualFinSF    
##  FuseA:  94   Min.   : 334   Min.   :   0   Min.   :  0.000  
##  FuseF:  27   1st Qu.: 882   1st Qu.:   0   1st Qu.:  0.000  
##  FuseP:   3   Median :1087   Median :   0   Median :  0.000  
##  Mix  :   1   Mean   :1163   Mean   : 347   Mean   :  5.845  
##  SBrkr:1334   3rd Qu.:1391   3rd Qu.: 728   3rd Qu.:  0.000  
##  NA's :   1   Max.   :4692   Max.   :2065   Max.   :572.000  
##                                                              
##    GrLivArea     BsmtFullBath     BsmtHalfBath        FullBath    
##  Min.   : 334   Min.   :0.0000   Min.   :0.00000   Min.   :0.000  
##  1st Qu.:1130   1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:1.000  
##  Median :1464   Median :0.0000   Median :0.00000   Median :2.000  
##  Mean   :1515   Mean   :0.4253   Mean   :0.05753   Mean   :1.565  
##  3rd Qu.:1777   3rd Qu.:1.0000   3rd Qu.:0.00000   3rd Qu.:2.000  
##  Max.   :5642   Max.   :3.0000   Max.   :2.00000   Max.   :3.000  
##                                                                   
##     HalfBath       BedroomAbvGr    KitchenAbvGr   KitchenQual
##  Min.   :0.0000   Min.   :0.000   Min.   :0.000   Ex:100     
##  1st Qu.:0.0000   1st Qu.:2.000   1st Qu.:1.000   Fa: 39     
##  Median :0.0000   Median :3.000   Median :1.000   Gd:586     
##  Mean   :0.3829   Mean   :2.866   Mean   :1.047   TA:735     
##  3rd Qu.:1.0000   3rd Qu.:3.000   3rd Qu.:1.000              
##  Max.   :2.0000   Max.   :8.000   Max.   :3.000              
##                                                              
##   TotRmsAbvGrd    Functional    Fireplaces    FireplaceQu   GarageType 
##  Min.   : 2.000   Maj1:  14   Min.   :0.000   Ex  : 24    2Types :  6  
##  1st Qu.: 5.000   Maj2:   5   1st Qu.:0.000   Fa  : 33    Attchd :870  
##  Median : 6.000   Min1:  31   Median :1.000   Gd  :380    Basment: 19  
##  Mean   : 6.518   Min2:  34   Mean   :0.613   Po  : 20    BuiltIn: 88  
##  3rd Qu.: 7.000   Mod :  15   3rd Qu.:1.000   TA  :313    CarPort:  9  
##  Max.   :14.000   Sev :   1   Max.   :3.000   NA's:690    Detchd :387  
##                   Typ :1360                               NA's   : 81  
##   GarageYrBlt   GarageFinish   GarageCars      GarageArea     GarageQual 
##  Min.   :1900   Fin :352     Min.   :0.000   Min.   :   0.0   Ex  :   3  
##  1st Qu.:1961   RFn :422     1st Qu.:1.000   1st Qu.: 334.5   Fa  :  48  
##  Median :1980   Unf :605     Median :2.000   Median : 480.0   Gd  :  14  
##  Mean   :1979   NA's: 81     Mean   :1.767   Mean   : 473.0   Po  :   3  
##  3rd Qu.:2002                3rd Qu.:2.000   3rd Qu.: 576.0   TA  :1311  
##  Max.   :2010                Max.   :4.000   Max.   :1418.0   NA's:  81  
##  NA's   :81                                                              
##  GarageCond  PavedDrive   WoodDeckSF      OpenPorchSF     EnclosedPorch   
##  Ex  :   2   N:  90     Min.   :  0.00   Min.   :  0.00   Min.   :  0.00  
##  Fa  :  35   P:  30     1st Qu.:  0.00   1st Qu.:  0.00   1st Qu.:  0.00  
##  Gd  :   9   Y:1340     Median :  0.00   Median : 25.00   Median :  0.00  
##  Po  :   7              Mean   : 94.24   Mean   : 46.66   Mean   : 21.95  
##  TA  :1326              3rd Qu.:168.00   3rd Qu.: 68.00   3rd Qu.:  0.00  
##  NA's:  81              Max.   :857.00   Max.   :547.00   Max.   :552.00  
##                                                                           
##    X3SsnPorch      ScreenPorch        PoolArea        PoolQC    
##  Min.   :  0.00   Min.   :  0.00   Min.   :  0.000   Ex  :   2  
##  1st Qu.:  0.00   1st Qu.:  0.00   1st Qu.:  0.000   Fa  :   2  
##  Median :  0.00   Median :  0.00   Median :  0.000   Gd  :   3  
##  Mean   :  3.41   Mean   : 15.06   Mean   :  2.759   NA's:1453  
##  3rd Qu.:  0.00   3rd Qu.:  0.00   3rd Qu.:  0.000              
##  Max.   :508.00   Max.   :480.00   Max.   :738.000              
##                                                                 
##    Fence      MiscFeature    MiscVal             MoSold      
##  GdPrv:  59   Gar2:   2   Min.   :    0.00   Min.   : 1.000  
##  GdWo :  54   Othr:   2   1st Qu.:    0.00   1st Qu.: 5.000  
##  MnPrv: 157   Shed:  49   Median :    0.00   Median : 6.000  
##  MnWw :  11   TenC:   1   Mean   :   43.49   Mean   : 6.322  
##  NA's :1179   NA's:1406   3rd Qu.:    0.00   3rd Qu.: 8.000  
##                           Max.   :15500.00   Max.   :12.000  
##                                                              
##      YrSold        SaleType    SaleCondition    SalePrice     
##  Min.   :2006   WD     :1267   Abnorml: 101   Min.   : 34900  
##  1st Qu.:2007   New    : 122   AdjLand:   4   1st Qu.:129975  
##  Median :2008   COD    :  43   Alloca :  12   Median :163000  
##  Mean   :2008   ConLD  :   9   Family :  20   Mean   :180921  
##  3rd Qu.:2009   ConLI  :   5   Normal :1198   3rd Qu.:214000  
##  Max.   :2010   ConLw  :   5   Partial: 125   Max.   :755000  
##                 (Other):   9
#plot(train_set$GrLivArea, train_set$SalePrice, main="Square Footage vs. Price", xlab="Square Footage", ylab="Sales Price") + 

Living Areas vs Sales Price

ggplot(train_set, aes(x=GrLivArea,y=SalePrice)) + geom_point(color="blue") +ylim(0,800000) + geom_smooth(method=lm,color="darkred")

ggplot(train_set, aes(x=TotalBsmtSF)) + geom_histogram(color="blue", bins = 100)

Total Basement Square Footage vs Sales Price

ggplot(train_set, aes(x=TotalBsmtSF,y=SalePrice)) + geom_point(color="red") +ylim(0,800000) + geom_smooth(method=lm)

ggplot(train_set, aes(x=TotalBsmtSF)) + geom_histogram(color="red", bins = 100)

Total Garage Area vs Sales Price

ggplot(train_set, aes(x=GarageArea,y=SalePrice)) + geom_point(color="green") +ylim(0,800000) + geom_smooth(method=lm)

ggplot(train_set, aes(x=GarageArea)) + geom_histogram(color="green", bins = 100)

## Correlation Data and Plot

correlation_set <- train_set %>% select("TotalBsmtSF", "GarageArea","GrLivArea", "SalePrice")
cor.value <- cor(correlation_set)
corrplot.mixed(cor.value )

Model 1 - Comparing Square Area vs Sales Price

model1 <- lm(SalePrice ~ GrLivArea, train_set)
summary(model1)
## 
## Call:
## lm(formula = SalePrice ~ GrLivArea, data = train_set)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -462999  -29800   -1124   21957  339832 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 18569.026   4480.755   4.144 3.61e-05 ***
## GrLivArea     107.130      2.794  38.348  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 56070 on 1458 degrees of freedom
## Multiple R-squared:  0.5021, Adjusted R-squared:  0.5018 
## F-statistic:  1471 on 1 and 1458 DF,  p-value: < 2.2e-16

Model 2 - Comparing Total Basement Square Area vs Sales Price

model2 <- lm(SalePrice ~ TotalBsmtSF, train_set)
summary(model2)
## 
## Call:
## lm(formula = SalePrice ~ TotalBsmtSF, data = train_set)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -582310  -39612  -14095   33315  420018 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 63430.629   4286.892   14.80   <2e-16 ***
## TotalBsmtSF   111.110      3.745   29.67   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 62750 on 1458 degrees of freedom
## Multiple R-squared:  0.3765, Adjusted R-squared:  0.3761 
## F-statistic: 880.3 on 1 and 1458 DF,  p-value: < 2.2e-16

Model 3 - Comparing Total Garage Area vs Sales Price

model3 <- lm(SalePrice ~ GarageArea, train_set)
summary(model3)
## 
## Call:
## lm(formula = SalePrice ~ GarageArea, data = train_set)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -279451  -33024   -5045   24479  490913 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 71357.421   3949.003   18.07   <2e-16 ***
## GarageArea    231.646      7.608   30.45   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 62140 on 1458 degrees of freedom
## Multiple R-squared:  0.3887, Adjusted R-squared:  0.3882 
## F-statistic:   927 on 1 and 1458 DF,  p-value: < 2.2e-16

Hypothesis Testing

Square Area vs Sales Price

The correlation between garage square footage and the sales price is 71%. This value alone provides the best correlation between the quantitive variables and the target variable. true correlation is not equal to 0

cor.test(formula= ~GrLivArea +SalePrice, conf.level = .80, data=train_set)
## 
##  Pearson's product-moment correlation
## 
## data:  GrLivArea and SalePrice
## t = 38.348, df = 1458, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 80 percent confidence interval:
##  0.6915087 0.7249450
## sample estimates:
##       cor 
## 0.7086245

Total Basement SF vs Sales Price

The correlation between garage square footage and the sales price is only 61%, true correlation is not equal to 0

cor.test(formula= ~TotalBsmtSF +SalePrice, conf.level = .80, data=train_set)
## 
##  Pearson's product-moment correlation
## 
## data:  TotalBsmtSF and SalePrice
## t = 29.671, df = 1458, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 80 percent confidence interval:
##  0.5922142 0.6340846
## sample estimates:
##       cor 
## 0.6135806

Garage SF vs Sales Price

The correlation between garage square footage and the sales price is only 62%, the true correlation is not equal to 0.

cor.test(formula= ~GarageArea +SalePrice, conf.level = .80, data=train_set)
## 
##  Pearson's product-moment correlation
## 
## data:  GarageArea and SalePrice
## t = 30.446, df = 1458, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 80 percent confidence interval:
##  0.6024756 0.6435283
## sample estimates:
##       cor 
## 0.6234314

Analysis and familywise errors

Due to the low correlation values for each of the selected variables, I would not be concerned with familywise errors (making Type 1 errors).

Linear Algebra

5 points. Linear Algebra and Correlation. Invert your correlation matrix from above. (This is known as the precision matrix and contains variance inflation factors on the diagonal.) Multiply the correlation matrix by the precision matrix, and then multiply the precision matrix by the correlation matrix. Conduct LU decomposition on the matrix.

Inverting the matrix using the R built-in solve function

# Inverting the matrix using the R built-in solve function
precession.matrix <- solve(cor.value)
precession.matrix
##             TotalBsmtSF  GarageArea   GrLivArea  SalePrice
## TotalBsmtSF  1.65207569 -0.27914378 -0.05133909 -0.8032744
## GarageArea  -0.27914378  1.68692624 -0.08097502 -0.8230248
## GrLivArea   -0.05133909 -0.08097502  2.01512843 -1.3459863
## SalePrice   -0.80327436 -0.82302478 -1.34598629  2.9597719

Multiply the correlation matrix by the precision matrix

# Multiply the correlation matrix by the precision matrix
corr.times.precession <- cor.value %*% precession.matrix
round(corr.times.precession ,4)
##             TotalBsmtSF GarageArea GrLivArea SalePrice
## TotalBsmtSF           1          0         0         0
## GarageArea            0          1         0         0
## GrLivArea             0          0         1         0
## SalePrice             0          0         0         1

Conduct LU decomposition on the matrix.

library('matrixcalc')
LU <- lu.decomposition(cor.value)
# LU L
LU$L
##           [,1]      [,2]      [,3] [,4]
## [1,] 1.0000000 0.0000000 0.0000000    0
## [2,] 0.4866655 1.0000000 0.0000000    0
## [3,] 0.4548682 0.3244797 1.0000000    0
## [4,] 0.6135806 0.4256308 0.4547601    1
# LU U
LU$U
##      [,1]      [,2]      [,3]      [,4]
## [1,]    1 0.4866655 0.4548682 0.6135806
## [2,]    0 0.7631567 0.2476288 0.3248230
## [3,]    0 0.0000000 0.7127444 0.3241277
## [4,]    0 0.0000000 0.0000000 0.3378639
# Check if we get the original
cor.value
##             TotalBsmtSF GarageArea GrLivArea SalePrice
## TotalBsmtSF   1.0000000  0.4866655 0.4548682 0.6135806
## GarageArea    0.4866655  1.0000000 0.4689975 0.6234314
## GrLivArea     0.4548682  0.4689975 1.0000000 0.7086245
## SalePrice     0.6135806  0.6234314 0.7086245 1.0000000
LU$L %*% LU$U
##           [,1]      [,2]      [,3]      [,4]
## [1,] 1.0000000 0.4866655 0.4548682 0.6135806
## [2,] 0.4866655 1.0000000 0.4689975 0.6234314
## [3,] 0.4548682 0.4689975 1.0000000 0.7086245
## [4,] 0.6135806 0.6234314 0.7086245 1.0000000
round(LU$L %*% LU$U,4) == round(cor.value,4)
##             TotalBsmtSF GarageArea GrLivArea SalePrice
## TotalBsmtSF        TRUE       TRUE      TRUE      TRUE
## GarageArea         TRUE       TRUE      TRUE      TRUE
## GrLivArea          TRUE       TRUE      TRUE      TRUE
## SalePrice          TRUE       TRUE      TRUE      TRUE

5 points. Calculus-Based Probability & Statistics. Many times, it makes sense to fit a closed form distribution to data. Select a variable in the Kaggle.com training dataset that is skewed to the right, shift it so that the minimum value is absolutely above zero if necessary. Then load the MASS package and run fitdistr to fit an exponential probability density function. (See https://stat.ethz.ch/R-manual/R-devel/library/MASS/html/fitdistr.html ). Find the optimal value of $$ for this distribution, and then take 1000 samples from this exponential distribution using this value (e.g., rexp(1000, $$)). Plot a histogram and compare it with a histogram of your original variable. Using the exponential pdf, find the 5th and 95th percentiles using the cumulative distribution function (CDF). Also generate a 95% confidence interval from the empirical data, assuming normality. Finally, provide the empirical 5th percentile and 95th percentile of the data. Discuss.

For this exercise, I decided to to choose garage area as it is slighly skewed to the right. I installed the moments library as it has a function to measure skewness. In this case, our skew is 0.18, which implies a slighlt rigth skew.

# Load the mass libraries
library('MASS')
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
library('moments') # https://cran.r-project.org/web/packages/moments/moments.pdf
# check amount of skewness
moments::skewness(train_set$GarageArea)
## [1] 0.1797959
# Then load the MASS package and run fitdistr to fit an exponential probability density function. 
lamda.rate <- fitdistr(train_set$GarageArea, densfun = "exponential")
lamda.rate$estimate
##        rate 
## 0.002114254
# then take 1000 samples from this exponential distribution using this value (e.g., rexp(1000, $\lambda $))
lamda.thousand.samples <- rexp(1000, lamda.rate$estimate)
# Plot a histogram and compare it with a histogram of your original variable. 

Histogram of Optimal Lamda

ggplot(as.data.frame(lamda.thousand.samples), aes(x=lamda.thousand.samples)) + geom_histogram(color="blue", bins = 100) + labs("Lamda Histogram")

Histogram of Garage data

 ggplot(train_set, aes(x=GarageArea)) + geom_histogram(color="green", bins = 100) + labs("Lamda Histogram")

Using the exponential pdf, find the 5th and 95th percentiles using the cumulative distribution function (CDF).

I calculated the formula for CI manually as many of the libraries did not work on my version of R.

# Using the exponential pdf, find the 5th and 95th percentiles using the cumulative distribution function (CDF).
quantile(ecdf(lamda.thousand.samples), c(0.05,0.95))
##         5%        95% 
##   26.63478 1421.46595
# Also generate a 95% confidence interval from the empirical data, assuming normality.
se <- sd(train_set$GarageArea) / sqrt((length(train_set$GarageArea)))

lower.bound <- mean(train_set$GarageArea) - 1.96 * se
upper.bound <- mean(train_set$GarageArea) + 1.96 * se
c(lower.bound,upper.bound)
## [1] 462.0129 483.9474
# Finally, provide the empirical 5th percentile and 95th percentile of the data.
quantile(train_set$GarageArea, c(0.05,0.95))
##    5%   95% 
##   0.0 850.1

10 points. Modeling. Build some type of multiple regression model and submit your model to the competition board. Provide your complete model summary and results with analysis. Report your Kaggle.com user name and score.

library(MASS)
library(dplyr)
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:RCurl':
## 
##     complete
# Fit the full model 
full.model <- lm(SalePrice ~ CentralAir + GrLivArea + Fireplaces +GarageCars+GarageFinish + WoodDeckSF+ Heating+ CentralAir+TotalBsmtSF+OverallCond+BldgType+Utilities+LotShape+LandSlope+HouseStyle+Neighborhood,data =train_set)

# Prediction (Linear Prediction)

prediction <- predict(full.model, newdata = test_set[,-1])
prediction.df <- data.frame(Id = test_set$Id, SalePrice = prediction)
prediction.df  <- prediction.df  %>% mutate(SalePrice = replace_na(SalePrice,163000))
write.csv(prediction.df , file = "prediction_linear.csv",  row.names = FALSE)

Random Forest

YouTube Video:

https://www.youtube.com/watch?v=J1gxjLpCVt4

library(randomForest)
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
## 
##     margin
## The following object is masked from 'package:dplyr':
## 
##     combine
## The following object is masked from 'package:gridExtra':
## 
##     combine
library(randomForestExplainer)

forest <- randomForest(SalePrice ~ CentralAir + GrLivArea + Fireplaces +GarageCars+GarageFinish + WoodDeckSF+ Heating+ CentralAir+TotalBsmtSF+OverallCond+BldgType+Utilities+LotShape+LandSlope+HouseStyle+Neighborhood , data = train_set[,-1], localImp = TRUE, na.action = na.roughfix)
forest
## 
## Call:
##  randomForest(formula = SalePrice ~ CentralAir + GrLivArea + Fireplaces +      GarageCars + GarageFinish + WoodDeckSF + Heating + CentralAir +      TotalBsmtSF + OverallCond + BldgType + Utilities + LotShape +      LandSlope + HouseStyle + Neighborhood, data = train_set[,      -1], localImp = TRUE, na.action = na.roughfix) 
##                Type of random forest: regression
##                      Number of trees: 500
## No. of variables tried at each split: 5
## 
##           Mean of squared residuals: 903863289
##                     % Var explained: 85.67
#forest$predicted
importance(forest)
##                %IncMSE IncNodePurity
## CentralAir   16.117915  5.347314e+10
## GrLivArea    50.822620  2.121055e+12
## Fireplaces   15.712075  3.189821e+11
## GarageCars   27.075063  1.455789e+12
## GarageFinish 23.539672  5.179643e+11
## WoodDeckSF    8.231356  2.220690e+11
## Heating      -3.439319  7.980295e+09
## TotalBsmtSF  35.436334  1.472188e+12
## OverallCond  17.789019  1.265622e+11
## BldgType     18.990226  8.730312e+10
## Utilities     0.000000  5.583130e+08
## LotShape      6.724943  8.164638e+10
## LandSlope     3.030324  3.477353e+10
## HouseStyle   19.615356  1.799720e+11
## Neighborhood 49.131628  2.351230e+12
varImpPlot(forest)

test_set$SalePrice <- 0
new_data <- rbind(train_set,test_set)
n.d <- new_data[new_data[,1] > 1460,]
prediction2.df <- predict(forest, n.d[,-1])
prediction2.df
##      1461      1462      1463      1464      1465      1466      1467 
## 121887.02 150420.97 184256.57 186023.14 196019.92 182497.47 174420.89 
##      1468      1469      1470      1471      1472      1473      1474 
## 176564.26 174338.52 127943.66 168396.87  98753.81  89960.66 150878.64 
##      1475      1476      1477      1478      1479      1480      1481 
## 128541.59 357953.92 245883.56 327831.42 299257.41 492561.40 380048.47 
##      1482      1483      1484      1485      1486      1487      1488 
## 199146.53 178420.91 162906.56 166348.87 202811.63 369071.25 239098.19 
##      1489      1490      1491      1492      1493      1494      1495 
## 191604.44 223105.22 185957.51 111782.80 175939.91 299490.76 292889.73 
##      1496      1497      1498      1499      1500      1501      1502 
## 244917.29 199322.17 150674.04 151371.87 159237.16 175334.30 151580.48 
##      1503      1504      1505      1506      1507      1508      1509 
## 249354.45 237142.40 241997.84 202171.79 242153.67 197944.20 178337.71 
##      1510      1511      1512      1513      1514      1515      1516 
## 158857.88 160057.37 167772.78 147350.52        NA 208644.13 154962.53 
##      1517      1518      1519      1520      1521      1522      1523 
## 147585.66 138553.70 172617.55 139872.50 148298.24 162768.96 124001.95 
##      1524      1525      1526      1527      1528      1529      1530 
## 124447.79 128348.65 126359.28 118391.00 132186.45 152306.19 169068.26 
##      1531      1532      1533      1534      1535      1536      1537 
## 144142.67        NA 141776.20 112785.01 146158.81 114721.19  88742.52 
##      1538      1539      1540      1541      1542      1543      1544 
## 133231.72 153475.78        NA 139829.89 145340.32 179563.57  93080.36 
##      1545      1546      1547      1548      1549      1550      1551 
## 108318.33 125852.80 136366.25 135254.42 127337.53 156469.03 122125.55 
##      1552      1553      1554      1555      1556      1557      1558 
## 155119.35        NA 121873.13 187376.49 133164.86        NA  90322.29 
##      1559      1560      1561      1562      1563      1564      1565 
##        NA 158978.83        NA 120906.22 116411.64 165277.42 156839.89 
##      1566      1567      1568      1569      1570      1571      1572 
## 221226.82  96081.74 235798.96 144224.41 111735.63 131248.09 144789.17 
##      1573      1574      1575      1576      1577      1578      1579 
## 184075.03 110828.54 188056.52 231677.55 185964.30 159340.25 136719.32 
##      1580      1581      1582      1583      1584      1585      1586 
## 195836.41 139711.31 128659.93 299148.02 245175.30 139472.52  76591.07 
##      1587      1588      1589      1590      1591      1592      1593 
##  94635.71 147657.28 104062.72 125441.23        NA 134116.81 131211.90 
##      1594      1595      1596      1597      1598      1599      1600 
##        NA        NA 232188.53 182590.67 179612.22 147680.58 165805.85 
##      1601      1602      1603      1604      1605      1606      1607 
##  86608.29  99440.43 114073.01 281455.50 188517.89 168184.56 176155.65 
##      1608      1609      1610      1611      1612      1613      1614 
## 211828.62 185280.45 176401.07 148178.06 175246.53 165974.62 124859.48 
##      1615      1616      1617      1618      1619      1620      1621 
##        NA        NA  89788.53 124589.66 129105.03 170898.82 138412.76 
##      1622      1623      1624      1625      1626      1627      1628 
## 153673.13 203597.69 195252.53 112908.77 169587.89 184654.19 242328.82 
##      1629      1630      1631      1632      1633      1634      1635 
## 174794.24 333289.16 239562.58 250168.47 170987.52 179880.02 175807.73 
##      1636      1637      1638      1639      1640      1641      1642 
## 163067.51 206577.73 207776.95 195479.12 251835.45 188428.23 232813.00 
##      1643      1644      1645      1646      1647      1648      1649 
## 239841.92 233725.08 205420.64 161766.51 160043.63 150606.17 134893.90 
##      1650      1651      1652      1653      1654      1655      1656 
## 112017.46 120023.82  92483.97  98605.60 162415.91 137012.08 149451.94 
##      1657      1658      1659      1660      1661      1662      1663 
## 152585.07 161400.08 128854.83 144869.04 448187.10 361942.91 351744.26 
##      1664      1665      1666      1667      1668      1669      1670 
## 492289.22 336008.06 341445.93 346323.76 310479.56 279771.72 342613.15 
##      1671      1672      1673      1674      1675      1676      1677 
## 290610.02 484103.04 310913.36 274214.47 209841.73 204222.26 209715.79 
##      1678      1679      1680      1681      1682      1683      1684 
## 461259.05 431746.74 312120.62 227904.77 319963.74 196887.76 179171.52 
##      1685      1686      1687      1688      1689      1690      1691 
## 177516.98 174544.73 166348.87 183624.61 187972.19 178098.34 169935.43 
##      1692      1693      1694      1695      1696      1697      1698 
## 236386.15 164908.21 178787.03 170330.09 283199.34 169742.02 379943.10 
##      1699      1700      1701      1702      1703      1704      1705 
## 410049.12 262999.28 267826.42 255002.46 272244.42 261592.82 221662.66 
##      1706      1707      1708      1709      1710      1711      1712 
## 406036.40 210713.58 202813.77 249903.55 212018.41 250357.21 271381.95 
##      1713      1714      1715      1716      1717      1718      1719 
## 268396.17 239605.90 205443.43 174539.52 163699.52        NA 200631.63 
##      1720      1721      1722      1723      1724      1725      1726 
## 227535.83 154538.90        NA 153733.43 197635.08 219291.77 197459.97 
##      1727      1728      1729      1730      1731      1732      1733 
## 176958.63 175932.54 163982.79 167933.54 132814.62 123262.06 113290.59 
##      1734      1735      1736      1737      1738      1739      1740 
## 130449.71 130343.34 124009.12 364049.65 221899.51 330726.10 296565.91 
##      1741      1742      1743      1744      1745      1746      1747 
## 186078.05 175334.30 173016.35 268736.65 254427.10 208590.13 204357.14 
##      1748      1749      1750      1751      1752      1753      1754 
## 232930.74 170400.37 152675.39 235246.41 122670.13 163312.16 222039.60 
##      1755      1756      1757      1758      1759      1760      1761 
## 168612.82 129609.58 122560.30 164952.04 163737.79 163944.14 160734.11 
##      1762      1763      1764      1765      1766      1767      1768 
## 199545.74 177058.46 125659.02 152666.22 172437.74 181295.21 146874.62 
##      1769      1770      1771      1772      1773      1774      1775 
## 159321.72 137401.65 145976.19 149186.65 147407.49 148850.27 131973.59 
##      1776      1777      1778      1779      1780      1781      1782 
## 118492.84 116261.57 133726.63 126166.83 162720.12 148746.59 107858.03 
##      1783      1784      1785      1786      1787      1788      1789 
## 129185.26 103741.88 116481.13 195790.62 144644.95        NA 108185.33 
##      1790      1791      1792      1793      1794      1795      1796 
##  86739.61 203475.96 146590.71 135499.35 141981.04 139048.26 127317.08 
##      1797      1798      1799      1800      1801      1802      1803 
## 118920.19 133259.24 114162.46 136222.00 132799.83 148886.12 146137.24 
##      1804      1805      1806      1807      1808      1809      1810 
## 140442.22 135578.22 120204.49 123663.42 116280.79        NA 155966.62 
##      1811      1812      1813      1814      1815      1816      1817 
##        NA        NA 116489.40 109814.83  70448.83  97775.68 128435.33 
##      1818      1819      1820      1821      1822      1823      1824 
## 146176.79 123484.12        NA 128671.48 141911.01        NA 137967.84 
##      1825      1826      1827      1828      1829      1830      1831 
## 144353.50 112170.35 114095.03 143787.39 134139.06 139421.43 142282.17 
##      1832      1833      1834      1835      1836      1837      1838 
##        NA 142860.17 133506.34        NA 148604.36        NA 133358.62 
##      1839      1840      1841      1842      1843      1844      1845 
## 110662.84        NA 136224.59  83240.91 137272.53 139737.61 150011.80 
##      1846      1847      1848      1849      1850      1851      1852 
## 153531.56 197345.99        NA 117891.37 120104.98 147090.95 137914.78 
##      1853      1854      1855      1856      1857      1858      1859 
## 128190.69 151726.29 152011.22 217251.76 189422.12 151819.69 124937.89 
##      1860      1861      1862      1863      1864      1865      1866 
## 154776.45 125685.33 297486.12 297486.12 297486.12 273961.86 285566.29 
##      1867      1868      1869      1870      1871      1872      1873 
## 237064.08 232408.35 190170.08 205081.31 244503.98 174013.00 229375.36 
##      1874      1875      1876      1877      1878      1879      1880 
## 138309.50 200663.11 210657.59 197269.78 213650.42 122630.09 137870.87 
##      1881      1882      1883      1884      1885      1886      1887 
## 242585.84 242872.00 198832.19 222120.68 207508.23 238383.53 192818.86 
##      1888      1889      1890      1891      1892      1893      1894 
## 238976.13 168546.90 129825.04 139176.50 107772.57 139365.00        NA 
##      1895      1896      1897      1898      1899      1900      1901 
## 141386.27 122780.03 127864.56 139772.63 159646.08 155117.86 204018.17 
##      1902      1903      1904      1905      1906      1907      1908 
## 149997.70 211319.38 149654.33 237127.48 174201.51 237436.13 141428.31 
##      1909      1910      1911      1912      1913      1914      1915 
## 140138.98 141124.73 200679.10 279910.58 159942.41  70343.24 292814.37 
##      1916      1917      1918      1919      1920      1921      1922 
##        NA 241963.59 135773.44 162995.75 166063.92 367288.83 256577.35 
##      1923      1924      1925      1926      1927      1928      1929 
## 228739.54 233148.09 220731.20 355272.07 133741.11 163266.82 129129.27 
##      1930      1931      1932      1933      1934      1935      1936 
## 139582.63 135351.18 139807.46 179388.24 178427.26 176535.49 182104.76 
##      1937      1938      1939      1940      1941      1942      1943 
## 184648.73 172068.85 267811.44 196019.92 178340.66 190133.76 196610.79 
##      1944      1945      1946      1947      1948      1949      1950 
## 335797.95 366350.75        NA 281676.92 196472.49 201429.34 166305.20 
##      1951      1952      1953      1954      1955      1956      1957 
## 246160.75 231305.18 167458.92 194036.23 157346.18 315357.26 172537.91 
##      1958      1959      1960      1961      1962      1963      1964 
## 248579.49 139331.75 123679.87 121701.06  90220.91 105043.86 109474.99 
##      1965      1966      1967      1968      1969      1970      1971 
## 158509.17 144766.35 277859.07 451551.91 385456.19 450864.43 423469.13 
##      1972      1973      1974      1975      1976      1977      1978 
## 351127.79 279044.12 328988.00 499425.85 290505.98 355026.69 436372.71 
##      1979      1980      1981      1982      1983      1984      1985 
## 341839.03 199000.10 317840.62 210831.55 201134.51 171114.97 208466.38 
##      1986      1987      1988      1989      1990      1991      1992 
## 192808.46 169344.02 178954.09 188250.36 214336.12 210677.44 204158.98 
##      1993      1994      1995      1996      1997      1998      1999 
## 176813.05 234083.26 180645.45 248261.36 300151.43 367040.41 271814.52 
##      2000      2001      2002      2003      2004      2005      2006 
## 303488.00 304639.33 232785.91 254027.47 254729.31 217018.73 209891.72 
##      2007      2008      2009      2010      2011      2012      2013 
## 280539.41 197388.85 189427.22 186500.91        NA 166268.83 177561.30 
##      2014      2015      2016      2017      2018      2019      2020 
## 192841.32 187229.56 191864.43 194762.40 130956.28 121968.35 108533.63 
##      2021      2022      2023      2024      2025      2026      2027 
## 113367.77 204284.15 150506.47 282261.47 329405.86 192798.54 150674.04 
##      2028      2029      2030      2031      2032      2033      2034 
## 157090.61 177355.07 274345.25 202777.86 230773.68 250111.76 160273.28 
##      2035      2036      2037      2038      2039      2040      2041 
## 221335.93 186260.60 185883.31 241796.66 199887.02 305338.78 236499.71 
##      2042      2043      2044      2045      2046      2047      2048 
## 230175.38 179172.37 210900.91 190163.85 150897.41 148691.80 135985.84 
##      2049      2050      2051      2052      2053      2054      2055 
## 142161.37 199281.40 116431.16 148875.82 145872.99  98941.48 164735.66 
##      2056      2057      2058      2059      2060      2061      2062 
## 137303.63 131701.91 199046.33 137787.84 154066.01 181420.54 138162.37 
##      2063      2064      2065      2066      2067      2068      2069 
## 114259.41 148306.19 120539.76 176909.20 146017.63 146316.74  81078.97 
##      2070      2071      2072      2073      2074      2075      2076 
## 113471.26  96142.97 150035.87 144549.37 164078.31 135019.91 125184.75 
##      2077      2078      2079      2080      2081      2082      2083 
## 150027.91 127458.11 132902.84 131236.35 123659.86        NA 145966.05 
##      2084      2085      2086      2087      2088      2089      2090 
## 121967.54 134165.10  86639.73 122535.83 107988.74  87315.76 131362.53 
##      2091      2092      2093      2094      2095      2096      2097 
##        NA 141177.55 132316.18        NA 143595.44  80902.53        NA 
##      2098      2099      2100      2101      2102      2103      2104 
## 155766.87  70120.21        NA 128141.42 128790.98 100932.66 125815.06 
##      2105      2106      2107      2108      2109      2110      2111 
##        NA  92381.47 216928.16 109425.67 106493.46 133324.04 146243.98 
##      2112      2113      2114      2115      2116      2117      2118 
## 147494.35 121529.38 122169.22 148116.44 127021.26 141069.73 122135.37 
##      2119      2120      2121      2122      2123      2124      2125 
## 120187.49 124528.70        NA 115283.68  79558.11 175789.95 140530.24 
##      2126      2127      2128      2129      2130      2131      2132 
## 148202.99        NA 131785.14 106288.55 136497.03 182625.48 120500.14 
##      2133      2134      2135      2136      2137      2138      2139 
## 123853.57 125200.03 103080.53        NA 121186.17 136813.91 138153.19 
##      2140      2141      2142      2143      2144      2145      2146 
## 137626.18 146029.15 139820.81 142652.02 127458.55 135317.56 140571.87 
##      2147      2148      2149      2150      2151      2152      2153 
## 186076.21 132281.67 141190.92 230221.25 105637.05        NA 161407.10 
##      2154      2155      2156      2157      2158      2159      2160 
##        NA 118077.96 265788.88 220798.31 218768.59 199775.05 166434.90 
##      2161      2162      2163      2164      2165      2166      2167 
## 258288.86 328063.14 310674.06 225478.45 204518.66 142989.11 203236.44 
##      2168      2169      2170      2171      2172      2173      2174 
## 206486.30 189289.55 215691.85 150695.84 152214.52 191043.03 246862.45 
##      2175      2176      2177      2178      2179      2180      2181 
## 270990.43 282572.33 254113.47 197314.94 142191.75 214489.19 191562.20 
##      2182      2183      2184      2185      2186      2187      2188 
## 223904.67 190898.92 128265.48 130460.51 136825.08 151772.23 153187.55 
##      2189      2190      2191      2192      2193      2194      2195 
## 234799.38        NA        NA        NA        NA        NA 113170.68 
##      2196      2197      2198      2199      2200      2201      2202 
## 113075.05 120843.99 153565.42 168418.51 154123.08 151597.93 190635.21 
##      2203      2204      2205      2206      2207      2208      2209 
## 181572.02 215489.17 148920.81 149679.60 208984.83 246946.78 239732.35 
##      2210      2211      2212      2213      2214      2215      2216 
## 140090.72 114696.50 111455.51        NA 142806.87 104742.50 168428.24 
##      2217      2218      2219      2220      2221      2222      2223 
##  70404.85 100476.31  88972.15 100819.36 292814.37 281455.50 253496.06 
##      2224      2225      2226      2227      2228      2229      2230 
## 194112.12 134153.84 194013.83 210341.38 235059.33 210402.83 174373.22 
##      2231      2232      2233      2234      2235      2236      2237 
## 216702.62 215902.59 180032.98 205229.89 231895.98 266760.32 338038.12 
##      2238      2239      2240      2241      2242      2243      2244 
## 190470.00        NA 155844.95 175903.02 134549.44 134175.35 105453.08 
##      2245      2246      2247      2248      2249      2250      2251 
## 115668.67 141669.09        NA 124141.08 123268.84 128544.64 130518.73 
##      2252      2253      2254      2255      2256      2257      2258 
## 169410.18 163819.31 181357.44 191517.02 174678.61 228799.36 172171.34 
##      2259      2260      2261      2262      2263      2264      2265 
## 184714.93 164590.98 181474.03 194332.61 329136.93 524597.24 176885.85 
##      2266      2267      2268      2269      2270      2271      2272 
## 262437.45 333589.49 356574.01 161943.67 175991.93 206775.40 202721.26 
##      2273      2274      2275      2276      2277      2278      2279 
## 166581.97 170253.91 173822.11 173136.14 198642.06 148827.02 120981.01 
##      2280      2281      2282      2283      2284      2285      2286 
## 128113.08 167064.28 169496.45  97590.59 109367.53 139688.97 129152.90 
##      2287      2288      2289      2290      2291      2292      2293 
## 330274.32 306860.93 401759.30 426935.71 315993.87 402414.73 498650.35 
##      2294      2295      2296      2297      2298      2299      2300 
## 437119.60 449560.43 266852.04 305668.30 320701.65 453595.86 333941.84 
##      2301      2302      2303      2304      2305      2306      2307 
## 271068.74 252091.97 264357.50 267138.92 200249.67 200249.67 199059.88 
##      2308      2309      2310      2311      2312      2313      2314 
## 226903.92 258524.83 212219.03 199638.15 172986.36 180368.24 173015.30 
##      2315      2316      2317      2318      2319      2320      2321 
## 178082.59 187294.40 181325.65 177929.62 176099.95 173456.14 203687.48 
##      2322      2323      2324      2325      2326      2327      2328 
## 170533.94 166940.92 166348.87 206152.39 166235.11 212040.89 220567.12 
##      2329      2330      2331      2332      2333      2334      2335 
## 181906.98 185685.41 424227.81 407879.00 331488.24 300024.39 262534.41 
##      2336      2337      2338      2339      2340      2341      2342 
## 299758.29 188950.79 242429.37 248657.96 334238.08 217579.91 238292.55 
##      2343      2344      2345      2346      2347      2348      2349 
## 241317.58 252845.83 244956.33 206818.76 205110.52 223712.50 180058.51 
##      2350      2351      2352      2353      2354      2355      2356 
## 248619.29 252410.04 282919.98 289768.11        NA        NA 150641.44 
##      2357      2358      2359      2360      2361      2362      2363 
## 190538.99 201577.46 146198.84 123336.27 140002.20 257761.46 137151.68 
##      2364      2365      2366      2367      2368      2369      2370 
## 146697.94 209169.58 176277.15 240331.82 206145.43 237307.24 175168.60 
##      2371      2372      2373      2374      2375      2376      2377 
## 177406.09 194801.46 235020.02 274700.98 246981.78 286493.25 322605.72 
##      2378      2379      2380      2381      2382      2383      2384 
## 157192.91 224295.87 154171.64 168797.96 189004.27 207604.30 216449.79 
##      2385      2386      2387      2388      2389      2390      2391 
## 164275.47 143264.35 133692.04 108966.28 133718.45 147772.08 145319.30 
##      2392      2393      2394      2395      2396      2397      2398 
## 121578.77 158225.67 148760.75 202932.22 147262.68 185885.92 130039.64 
##      2399      2400      2401      2402      2403      2404      2405 
##        NA        NA 114377.03 139721.41 135748.89 155140.24 164906.99 
##      2406      2407      2408      2409      2410      2411      2412 
## 127887.15 134382.24 147886.38 140045.35 171538.38 118065.61 152600.28 
##      2413      2414      2415      2416      2417      2418      2419 
## 147368.03 131959.21 139046.91 133447.82 135000.85 131653.62 134815.69 
##      2420      2421      2422      2423      2424      2425      2426 
## 126040.94 161631.40 110067.28        NA 153131.46 204040.72 130986.89 
##      2427      2428      2429      2430      2431      2432      2433 
##        NA 152742.95 121251.85 129860.43 113082.23 135545.15 143914.45 
##      2434      2435      2436      2437      2438      2439      2440 
## 149157.06 150811.14 114480.37 116796.57 129439.41 123593.56 118675.99 
##      2441      2442      2443      2444      2445      2446      2447 
## 105233.74 114013.86 124023.24 123505.40  92113.18 151244.28 144418.42 
##      2448      2449      2450      2451      2452      2453      2454 
## 142143.91 124697.48 146110.52 130100.41 176615.26  86657.96 134166.21 
##      2455      2456      2457      2458      2459      2460      2461 
## 102980.69 137309.54 121777.31 123991.87 118092.39 143830.12 125468.76 
##      2462      2463      2464      2465      2466      2467      2468 
## 136892.06 125210.45 155745.89 134057.26 124468.99 125396.49  87767.90 
##      2469      2470      2471      2472      2473      2474      2475 
##  95072.75 185081.58 186420.04 185916.55 128867.41 111236.97 200902.89 
##      2476      2477      2478      2479      2480      2481      2482 
## 118881.57 128623.63 162700.17 114145.84 144914.45 125350.60 121863.83 
##      2483      2484      2485      2486      2487      2488      2489 
## 121792.20 129974.12 132540.62 147705.75 189226.74 137366.52 140730.02 
##      2490      2491      2492      2493      2494      2495      2496 
## 147236.74 101148.08 198576.84 131560.67 153354.20  93263.42 246737.00 
##      2497      2498      2499      2500      2501      2502      2503 
## 141274.98 118804.98  83053.55 127438.71 134021.28 132060.37 117056.67 
##      2504      2505      2506      2507      2508      2509      2510 
## 191979.77 208581.03 281959.06 305330.86 270298.49 223335.64 213981.57 
##      2511      2512      2513      2514      2515      2516      2517 
## 185128.77 209512.29 209598.95 221020.65 147205.29 167445.43 139279.09 
##      2518      2519      2520      2521      2522      2523      2524 
## 143257.14 226085.15 205169.71 197776.15 223720.30 135402.98 153239.85 
##      2525      2526      2527      2528      2529      2530      2531 
## 148474.48 145808.11 122104.43 129537.43 137673.80 131277.60 236261.86 
##      2532      2533      2534      2535      2536      2537      2538 
## 227461.77 201374.00 223944.47 266486.71 230927.70 192822.78 172897.05 
##      2539      2540      2541      2542      2543      2544      2545 
## 178696.34 184744.58 179111.75 184367.58 121985.73 134638.93 132475.37 
##      2546      2547      2548      2549      2550      2551      2552 
## 153222.74 126389.62 165887.06 148020.72 326720.23 139176.50 112765.52 
##      2553      2554      2555      2556      2557      2558      2559 
##        NA        NA 110154.35 104103.83 104532.26        NA 166938.78 
##      2560      2561      2562      2563      2564      2565      2566 
## 152552.63 148416.13 156481.27 171160.74 192477.15 165966.39 185990.77 
##      2567      2568      2569      2570      2571      2572      2573 
## 156254.52 220148.08 216710.61 139681.30 216128.05 150575.46 265422.40 
##      2574      2575      2576      2577      2578      2579      2580 
## 227732.79 128194.68        NA        NA 117344.20  89036.31        NA 
##      2581      2582      2583      2584      2585      2586      2587 
## 123455.35 115573.20 258101.40 159340.18 197596.56 168595.99 174477.35 
##      2588      2589      2590      2591      2592      2593      2594 
## 133868.38 144470.81 238404.53 222809.19 241553.03 231023.58 180634.05 
##      2595      2596      2597      2598      2599      2600      2601 
## 185199.32 347820.20 226979.91 262032.74 305769.23 202659.28 141899.57 
##      2602      2603      2604      2605      2606      2607      2608 
##  83938.60 103363.35        NA  84118.08 146840.80 172641.17 193829.81 
##      2609      2610      2611      2612      2613      2614      2615 
## 171286.78        NA 142111.97 150707.84 130361.68 127686.75 162751.36 
##      2616      2617      2618      2619      2620      2621      2622 
## 137004.65 189345.36 171004.98 204201.85 185040.71 181667.99 200587.40 
##      2623      2624      2625      2626      2627      2628      2629 
## 230294.51 305719.08 334809.73 171774.87 189184.67 403836.46 443412.45 
##      2630      2631      2632      2633      2634      2635      2636 
## 329038.34 437237.53 400300.49 287576.68 356841.24 162695.30 198811.78 
##      2637      2638      2639      2640      2641      2642      2643 
## 161752.81 251376.74 178885.73 154161.44 110557.49 181457.97 105582.25 
##      2644      2645      2646      2647      2648      2649      2650 
## 139845.61 102769.33  91974.04  98555.91 134427.01 143232.33 138250.23 
##      2651      2652      2653      2654      2655      2656      2657 
## 158605.28 401916.66 283107.84 298558.03 373045.54 303196.61 289545.23 
##      2658      2659      2660      2661      2662      2663      2664 
## 269691.20 312077.05 350334.98 320983.78 320298.12 291287.40 256412.36 
##      2665      2666      2667      2668      2669      2670      2671 
## 324934.47 284522.49 174091.51 175243.48 179125.76 258080.93 178018.10 
##      2672      2673      2674      2675      2676      2677      2678 
## 181070.35 201233.30 203500.70 162533.58 183995.42 205214.59 247568.61 
##      2679      2680      2681      2682      2683      2684      2685 
## 284232.96 287745.45 399672.16 351419.05 499000.55 325785.91 378857.79 
##      2686      2687      2688      2689      2690      2691      2692 
## 252233.64 254431.96 234972.11 194890.50 336016.93 212048.76        NA 
##      2693      2694      2695      2696      2697      2698      2699 
## 181839.41        NA 194655.02 179630.22 207777.55 195848.75 176716.09 
##      2700      2701      2702      2703      2704      2705      2706 
## 171791.26 160588.96 121306.23 145328.88 141154.89 125758.14 121170.17 
##      2707      2708      2709      2710      2711      2712      2713 
## 134491.26 131628.60        NA 126997.41 280028.97 381649.92 173303.64 
##      2714      2715      2716      2717      2718      2719      2720 
## 154073.23 176326.98 151580.48 176372.74 243920.61 168560.69 168422.29 
##      2721      2722      2723      2724      2725      2726      2727 
## 142081.45 164732.65 141188.96 129038.57 138202.55 137828.73 162365.44 
##      2728      2729      2730      2731      2732      2733      2734 
## 167896.45 162797.02 145497.92 125060.39 130516.25 166388.31 162842.78 
##      2735      2736      2737      2738      2739      2740      2741 
## 137888.65 149395.70 123720.70 139548.71 163953.10 146366.27 147057.77 
##      2742      2743      2744      2745      2746      2747      2748 
## 158915.37 152563.25 141759.35 140831.31 133692.50 149423.32 130570.97 
##      2749      2750      2751      2752      2753      2754      2755 
## 125198.00 135663.39 119426.41 200706.90 144739.49 278176.90 132416.20 
##      2756      2757      2758      2759      2760      2761      2762 
##  97539.01  89056.63  97069.75 159624.40 146246.25 143723.58 140498.97 
##      2763      2764      2765      2766      2767      2768      2769 
## 182067.32 138614.00 238391.91 137985.50  90982.14        NA 130773.04 
##      2770      2771      2772      2773      2774      2775      2776 
## 138081.36 120716.27        NA 151658.02 143697.00 140638.96 132770.53 
##      2777      2778      2779      2780      2781      2782      2783 
## 152869.52 106112.19 128341.71 102808.01 113471.78 102903.34 118579.68 
##      2784      2785      2786      2787      2788      2789      2790 
## 121852.26 139710.17  84570.30 114309.97 100004.91 170589.20        NA 
##      2791      2792      2793      2794      2795      2796      2797 
## 114764.39        NA 158898.81 114931.24 123848.16 112489.28 208457.95 
##      2798      2799      2800      2801      2802      2803      2804 
## 134792.43 115566.91        NA 112450.19 130468.80 150124.51 155581.48 
##      2805      2806      2807      2808      2809      2810      2811 
##  97817.35 102147.08 129623.83 142489.79 136363.42 135474.86 162308.74 
##      2812      2813      2814      2815      2816      2817      2818 
## 141631.35 185019.72 173009.49  99422.18 242037.04 173269.14 129013.03 
##      2819      2820      2821      2822      2823      2824      2825 
## 171481.92 151422.50 115580.89 217461.82 259155.17 210275.36 152532.00 
##      2826      2827      2828      2829      2830      2831      2832 
## 120403.18 128279.86 224955.06 187671.24 246725.19 191829.51 225333.45 
##      2833      2834      2835      2836      2837      2838      2839 
## 249499.92 213410.84 223051.52 186727.38 193625.47 158419.05 186025.46 
##      2840      2841      2842      2843      2844      2845      2846 
## 190866.73 200655.45 215958.52 153926.71 174132.10 131032.21 209286.97 
##      2847      2848      2849      2850      2851      2852      2853 
## 205036.69 181153.72 214235.70 272590.79 228130.85 245763.14 228046.42 
##      2854      2855      2856      2857      2858      2859      2860 
## 139472.52 189311.06 201766.39 184370.28 178965.88 139310.61        NA 
##      2861      2862      2863      2864      2865      2866      2867 
## 113915.96 170898.31        NA 208458.45 139176.50 131712.33 101057.51 
##      2868      2869      2870      2871      2872      2873      2874 
## 106557.80 128579.04 140496.16        NA  64012.19 105732.48 126515.21 
##      2875      2876      2877      2878      2879      2880      2881 
## 122386.39 152988.92 142512.16 182967.11 135202.71 113830.59 177126.51 
##      2882      2883      2884      2885      2886      2887      2888 
## 193544.36 198924.77 189997.51 189757.50 253853.30 105653.33 132085.86 
##      2889      2890      2891      2892      2893      2894      2895 
##        NA  96530.16 136327.19        NA        NA        NA 285325.63 
##      2896      2897      2898      2899      2900      2901      2902 
## 281455.50 217382.51 155884.07 188434.17 150538.70 227434.40 179901.16 
##      2903      2904      2905      2906      2907      2908      2909 
## 328199.76 308384.17 121966.59 187361.93 116467.58 126574.11 163472.84 
##      2910      2911      2912      2913      2914      2915      2916 
##        NA  84222.27 153157.18  83716.89        NA        NA  84976.34 
##      2917      2918      2919 
## 162989.63        NA 226993.12
prediction2.df <- data.frame(Id = test_set$Id, SalePrice = prediction)
prediction2.df  <- prediction2.df  %>% mutate(SalePrice = replace_na(SalePrice,163000))
write.csv(prediction2.df , file = "prediction_random_forest.csv",  row.names = FALSE)