#Author: Joel Trematore
#lOAD LIBRARIES
library(tidyverse) 
library(caret)
library(dplyr)
library(corrplot)
#library(lme4)
#library(lmerTest)
#library(effects)
for(i in 1:length(data_files)) {
  # Head of for-loop
  assign(paste0("data_", i),
  # Read and store data frames from the file location with the program file and data files
  read.csv(paste0("C:/Users/joeyj/Desktop/CAPSTONE/groupcsv/good_code/markdown/",data_files[i])))
}
#CREATE A LIST OF FRAMES TO READ
list_of_frames <- list(data_1,data_2,data_3,data_4,data_5,
                       data_6,data_7,data_8,data_9,data_10)
#CREATE EMPTY DATAFRAME TO FILL
info_frame <- data.frame()
info_frame_two <-data.frame()

#CREATE LIST TO CONTAIN MODEL INFO
list_of_models <- list()
list_of_model_summary <- list()
######################################################################## PART 1
# THIS SECTION USES ALL FEATURES FROM THE FRAME AND CREATES LINEAR MODELS FROM EACH
# ZIP CODE FRAME
i = 1

#CREATE ANOTHER FOR LOOP SO WE CAN CREATE THREE DIFFERENT MODELS
for (frame in list_of_frames){
        set.seed(123)
  
        partition <- createDataPartition(frame$Listing.price, p = 0.8, list = FALSE)
        #SPLIT DATA INTO TRAIN AND TEST FRAMES
        
        
        train <- frame[partition,]
        test <- frame[-partition,]
        
        #CREATE FIRST MODEL WITH ALL FEATURES
        model_one_train <- lm(as.numeric(Listing.price) ~ Year.built+Type+ Bedrooms+Total.baths+
                                X1.2.Baths +  X..of.garage.spaces+
                                X..of.carports+ Lot.acres+ Lot.sqft+ Taxes+
                                Main.house.sqft+ X..of.Fireplaces+Pool+Status
                              ,data = train)
        #CREATE MODEL SUMMARY
        model_one_train_summary <- summary(model_one_train)
        
        #SAVE THE PREDICTED VALUES TO THE TEST FRAME
        test$predicted_p <- predict(model_one_train,test)
        
        list_of_model_summary <-    append(list_of_model_summary,list(model_one_train_summary))
        
        list_of_models <- append(list_of_models,list(model_one_train))
        
        #SAVE PREDICTIONS INTO TEST FRAME
        test$predicted_p <- predict(model_one_train,test)
        
        #TAKE THE MEAN AND SQUARE THE VALUES FOR THE MSE
        mse <- mean((test$Listing.price - test$predicted_p)^2)
        off_by <- sqrt(mse)
        
        r_squared <- model_one_train_summary$r.squared*100
        
        zipcode <- frame$Zip[1]
        
        #ROW/COLUMN
        info_frame[i,1] <- mse
        info_frame[i,2] <- off_by
        info_frame[i,3] <- r_squared
        info_frame[i,4] <- zipcode
        info_frame[i,5] <- nrow(frame)
        info_frame[i,6] <- "Non AIC"

        
        i <- i +1
          
}#outer for loop
##  [1] "lm" "lm" "lm" "lm" "lm" "lm" "lm" "lm" "lm" "lm"
######################################################################## PART 2
#MODEL 1
#THIS SECTION USES AIC FUNCTION TO CHOOSE FEATURES

#CREATE PARTITION AND SET SEED
set.seed(123)
partition_two <- createDataPartition(data_2$Listing.price, p = 0.8, list = FALSE)
#SPLIT INTO TEST AND TRAIN
train_two <- data_2[partition_two,]
test_two <- data_2[-partition_two,]

#CREATE MODEL WITH FIRST DATA SET
model_set_one <- lm(as.numeric(Listing.price) ~ Year.built+Type+ Bedrooms+Total.baths+
                        X1.2.Baths +  X..of.garage.spaces+
                        X..of.carports+ Lot.acres+ Lot.sqft+ Taxes+
                        Main.house.sqft+ X..of.Fireplaces+Pool+Status
                      ,data = data_2)

#USE STEP FUNCTION TO SEE THE NEW MODEL WE GET 
step_model_final_one <- step(model_set_one)
## Start:  AIC=3100.67
## as.numeric(Listing.price) ~ Year.built + Type + Bedrooms + Total.baths + 
##     X1.2.Baths + X..of.garage.spaces + X..of.carports + Lot.acres + 
##     Lot.sqft + Taxes + Main.house.sqft + X..of.Fireplaces + Pool + 
##     Status
## 
##                       Df  Sum of Sq        RSS    AIC
## - X..of.carports       1 1.3583e+08 5.4602e+10 3099.1
## - Taxes                1 1.6457e+08 5.4631e+10 3099.1
## - X..of.garage.spaces  1 1.7407e+08 5.4640e+10 3099.2
## - Bedrooms             1 2.0408e+08 5.4670e+10 3099.3
## - X1.2.Baths           1 2.6811e+08 5.4734e+10 3099.4
## - Lot.sqft             1 6.0001e+08 5.5066e+10 3100.4
## - Lot.acres            1 6.7326e+08 5.5139e+10 3100.6
## <none>                              5.4466e+10 3100.7
## - Total.baths          1 1.4723e+09 5.5938e+10 3102.8
## - X..of.Fireplaces     1 1.5649e+09 5.6031e+10 3103.1
## - Pool                 1 4.4797e+09 5.8946e+10 3111.0
## - Type                 1 9.2542e+09 6.3720e+10 3123.2
## - Status               2 1.2170e+10 6.6636e+10 3128.1
## - Year.built           1 1.1409e+10 6.5875e+10 3128.3
## - Main.house.sqft      1 2.7833e+10 8.2299e+10 3163.1
## 
## Step:  AIC=3099.06
## as.numeric(Listing.price) ~ Year.built + Type + Bedrooms + Total.baths + 
##     X1.2.Baths + X..of.garage.spaces + Lot.acres + Lot.sqft + 
##     Taxes + Main.house.sqft + X..of.Fireplaces + Pool + Status
## 
##                       Df  Sum of Sq        RSS    AIC
## - X..of.garage.spaces  1 1.0179e+08 5.4704e+10 3097.4
## - Bedrooms             1 1.6282e+08 5.4765e+10 3097.5
## - Taxes                1 2.0539e+08 5.4807e+10 3097.6
## - X1.2.Baths           1 2.5774e+08 5.4860e+10 3097.8
## - Lot.sqft             1 5.7002e+08 5.5172e+10 3098.7
## - Lot.acres            1 6.4391e+08 5.5246e+10 3098.9
## <none>                              5.4602e+10 3099.1
## - Total.baths          1 1.4347e+09 5.6037e+10 3101.1
## - X..of.Fireplaces     1 1.4359e+09 5.6038e+10 3101.1
## - Pool                 1 4.3559e+09 5.8958e+10 3109.0
## - Type                 1 1.0414e+10 6.5016e+10 3124.3
## - Status               2 1.2156e+10 6.6758e+10 3126.4
## - Year.built           1 1.1315e+10 6.5917e+10 3126.4
## - Main.house.sqft      1 2.8495e+10 8.3097e+10 3162.6
## 
## Step:  AIC=3097.35
## as.numeric(Listing.price) ~ Year.built + Type + Bedrooms + Total.baths + 
##     X1.2.Baths + Lot.acres + Lot.sqft + Taxes + Main.house.sqft + 
##     X..of.Fireplaces + Pool + Status
## 
##                    Df  Sum of Sq        RSS    AIC
## - Bedrooms          1 1.9425e+08 5.4898e+10 3095.9
## - Taxes             1 2.0627e+08 5.4910e+10 3095.9
## - X1.2.Baths        1 3.1787e+08 5.5022e+10 3096.3
## - Lot.sqft          1 6.0055e+08 5.5304e+10 3097.1
## - Lot.acres         1 6.7685e+08 5.5381e+10 3097.3
## <none>                           5.4704e+10 3097.4
## - X..of.Fireplaces  1 1.4365e+09 5.6140e+10 3099.4
## - Total.baths       1 1.5899e+09 5.6294e+10 3099.8
## - Pool              1 4.2846e+09 5.8988e+10 3107.1
## - Status            2 1.2056e+10 6.6760e+10 3124.4
## - Type              1 1.2252e+10 6.6956e+10 3126.9
## - Year.built        1 1.8273e+10 7.2977e+10 3140.3
## - Main.house.sqft   1 2.9580e+10 8.4284e+10 3162.8
## 
## Step:  AIC=3095.91
## as.numeric(Listing.price) ~ Year.built + Type + Total.baths + 
##     X1.2.Baths + Lot.acres + Lot.sqft + Taxes + Main.house.sqft + 
##     X..of.Fireplaces + Pool + Status
## 
##                    Df  Sum of Sq        RSS    AIC
## - X1.2.Baths        1 2.4810e+08 5.5146e+10 3094.6
## - Lot.sqft          1 5.2078e+08 5.5419e+10 3095.4
## - Lot.acres         1 5.9345e+08 5.5491e+10 3095.6
## - Taxes             1 5.9627e+08 5.5494e+10 3095.6
## <none>                           5.4898e+10 3095.9
## - X..of.Fireplaces  1 1.2829e+09 5.6181e+10 3097.5
## - Total.baths       1 1.6165e+09 5.6515e+10 3098.4
## - Pool              1 4.0911e+09 5.8989e+10 3105.1
## - Status            2 1.2176e+10 6.7074e+10 3123.2
## - Type              1 1.2418e+10 6.7316e+10 3125.7
## - Year.built        1 1.8166e+10 7.3064e+10 3138.5
## - Main.house.sqft   1 5.0900e+10 1.0580e+11 3196.3
## 
## Step:  AIC=3094.61
## as.numeric(Listing.price) ~ Year.built + Type + Total.baths + 
##     Lot.acres + Lot.sqft + Taxes + Main.house.sqft + X..of.Fireplaces + 
##     Pool + Status
## 
##                    Df  Sum of Sq        RSS    AIC
## - Lot.sqft          1 5.7897e+08 5.5725e+10 3094.2
## - Taxes             1 5.8917e+08 5.5735e+10 3094.3
## - Lot.acres         1 6.5579e+08 5.5802e+10 3094.5
## <none>                           5.5146e+10 3094.6
## - X..of.Fireplaces  1 1.3567e+09 5.6503e+10 3096.4
## - Total.baths       1 1.7079e+09 5.6854e+10 3097.4
## - Pool              1 4.2314e+09 5.9378e+10 3104.1
## - Status            2 1.2018e+10 6.7165e+10 3121.4
## - Type              1 1.2246e+10 6.7392e+10 3123.9
## - Year.built        1 1.8474e+10 7.3620e+10 3137.7
## - Main.house.sqft   1 6.0671e+10 1.1582e+11 3208.4
## 
## Step:  AIC=3094.24
## as.numeric(Listing.price) ~ Year.built + Type + Total.baths + 
##     Lot.acres + Taxes + Main.house.sqft + X..of.Fireplaces + 
##     Pool + Status
## 
##                    Df  Sum of Sq        RSS    AIC
## <none>                           5.5725e+10 3094.2
## - Taxes             1 9.4505e+08 5.6670e+10 3094.9
## - X..of.Fireplaces  1 1.3425e+09 5.7068e+10 3096.0
## - Total.baths       1 1.7774e+09 5.7503e+10 3097.1
## - Pool              1 3.6785e+09 5.9404e+10 3102.2
## - Type              1 1.2044e+10 6.7769e+10 3122.8
## - Status            2 1.3242e+10 6.8967e+10 3123.5
## - Year.built        1 1.8447e+10 7.4172e+10 3136.8
## - Lot.acres         1 2.9126e+10 8.4851e+10 3157.8
## - Main.house.sqft   1 6.1907e+10 1.1763e+11 3208.8
#CREATE MODEL GIVEN BY STEP FUNCTION

step_model_one <- lm(as.numeric(Listing.price)~ X..of.garage.spaces+
                   Status +
                   Pool +
                   Taxes +
                   Total.baths+
                   Type +
                   Bedrooms +
                   Year.built +
                   Lot.sqft +
                   Main.house.sqft,data = train_two)

step_model_summary_one <- summary(step_model_one)

test_two$predicted_p  <- predict(step_model_one,test_two)

step_model_Listing_price_mse <- mean((test_two$Listing.price - test_two$predicted_p)^2)

info_frame_two[1,1] <- step_model_Listing_price_mse
info_frame_two[1,2] <- sqrt(step_model_Listing_price_mse)
info_frame_two[1,3] <- step_model_summary_one$r.squared*100
info_frame_two[1,4] <- data_2$Zip[1]
info_frame_two[1,5] <- nrow(data_2)
info_frame_two[1,6] <- "AIC"
######################################################################## PART 2
#MODEL 2
set.seed(123)
partition_three <- createDataPartition(data_3$Listing.price, p = 0.8, list = FALSE)
#SPLIT INTO TEST AND TRAIN
train_three <- data_3[partition_two,]
test_three <- data_3[-partition_two,]

#CREATE MODEL WITH ORIGINAL DATAFRAME
model_set_two <- lm(as.numeric(Listing.price) ~ Year.built+Type+ Bedrooms+Total.baths+
                        X1.2.Baths +  X..of.garage.spaces+
                        X..of.carports+ Lot.acres+ Lot.sqft+ Taxes+
                        Main.house.sqft+ X..of.Fireplaces+Pool+Status
                      ,data = data_3)

#USE STEP FUNCTION TO SEE THE NEW MODEL WE GET 
step_model_final_two <- step(model_set_two)
## Start:  AIC=4064.88
## as.numeric(Listing.price) ~ Year.built + Type + Bedrooms + Total.baths + 
##     X1.2.Baths + X..of.garage.spaces + X..of.carports + Lot.acres + 
##     Lot.sqft + Taxes + Main.house.sqft + X..of.Fireplaces + Pool + 
##     Status
## 
##                       Df  Sum of Sq        RSS    AIC
## - X1.2.Baths           1 5.8226e+07 4.3449e+11 4062.9
## - Total.baths          1 8.6883e+08 4.3530e+11 4063.3
## - Lot.sqft             1 1.4819e+09 4.3591e+11 4063.5
## - Taxes                1 1.4906e+09 4.3592e+11 4063.5
## - Lot.acres            1 2.3754e+09 4.3681e+11 4063.9
## <none>                              4.3443e+11 4064.9
## - X..of.carports       1 5.4142e+09 4.3985e+11 4065.2
## - Year.built           1 7.3797e+09 4.4181e+11 4066.0
## - Status               2 1.8812e+10 4.5324e+11 4068.8
## - Type                 1 1.4411e+10 4.4884e+11 4069.0
## - X..of.Fireplaces     1 1.6814e+10 4.5125e+11 4070.0
## - Bedrooms             1 2.0159e+10 4.5459e+11 4071.4
## - Pool                 1 6.2881e+10 4.9731e+11 4088.2
## - X..of.garage.spaces  1 9.4229e+10 5.2866e+11 4099.6
## - Main.house.sqft      1 4.0534e+11 8.3978e+11 4186.1
## 
## Step:  AIC=4062.9
## as.numeric(Listing.price) ~ Year.built + Type + Bedrooms + Total.baths + 
##     X..of.garage.spaces + X..of.carports + Lot.acres + Lot.sqft + 
##     Taxes + Main.house.sqft + X..of.Fireplaces + Pool + Status
## 
##                       Df  Sum of Sq        RSS    AIC
## - Taxes                1 1.5215e+09 4.3601e+11 4061.6
## - Lot.sqft             1 1.6956e+09 4.3619e+11 4061.6
## - Total.baths          1 2.6158e+09 4.3711e+11 4062.0
## - Lot.acres            1 2.6743e+09 4.3716e+11 4062.1
## <none>                              4.3449e+11 4062.9
## - X..of.carports       1 5.5346e+09 4.4002e+11 4063.3
## - Year.built           1 7.3215e+09 4.4181e+11 4064.0
## - Status               2 1.8825e+10 4.5332e+11 4066.8
## - Type                 1 1.4353e+10 4.4884e+11 4067.0
## - X..of.Fireplaces     1 1.6877e+10 4.5137e+11 4068.0
## - Bedrooms             1 2.3343e+10 4.5783e+11 4070.7
## - Pool                 1 6.2830e+10 4.9732e+11 4086.2
## - X..of.garage.spaces  1 1.0721e+11 5.4170e+11 4102.1
## - Main.house.sqft      1 4.0750e+11 8.4199e+11 4184.6
## 
## Step:  AIC=4061.56
## as.numeric(Listing.price) ~ Year.built + Type + Bedrooms + Total.baths + 
##     X..of.garage.spaces + X..of.carports + Lot.acres + Lot.sqft + 
##     Main.house.sqft + X..of.Fireplaces + Pool + Status
## 
##                       Df  Sum of Sq        RSS    AIC
## - Lot.sqft             1 1.0808e+09 4.3709e+11 4060.0
## - Lot.acres            1 1.9003e+09 4.3791e+11 4060.4
## - Total.baths          1 3.0552e+09 4.3907e+11 4060.9
## <none>                              4.3601e+11 4061.6
## - X..of.carports       1 6.7000e+09 4.4271e+11 4062.4
## - Type                 1 1.2968e+10 4.4898e+11 4065.0
## - X..of.Fireplaces     1 1.5817e+10 4.5183e+11 4066.2
## - Status               2 2.2617e+10 4.5863e+11 4067.0
## - Year.built           1 2.1246e+10 4.5726e+11 4068.5
## - Bedrooms             1 2.5299e+10 4.6131e+11 4070.1
## - Pool                 1 6.1677e+10 4.9769e+11 4084.3
## - X..of.garage.spaces  1 1.0989e+11 5.4590e+11 4101.6
## - Main.house.sqft      1 4.1188e+11 8.4790e+11 4183.9
## 
## Step:  AIC=4060.02
## as.numeric(Listing.price) ~ Year.built + Type + Bedrooms + Total.baths + 
##     X..of.garage.spaces + X..of.carports + Lot.acres + Main.house.sqft + 
##     X..of.Fireplaces + Pool + Status
## 
##                       Df  Sum of Sq        RSS    AIC
## - Total.baths          1 2.9553e+09 4.4005e+11 4059.3
## <none>                              4.3709e+11 4060.0
## - X..of.carports       1 6.8819e+09 4.4397e+11 4060.9
## - Type                 1 1.2845e+10 4.4994e+11 4063.4
## - Status               2 2.3106e+10 4.6020e+11 4065.7
## - X..of.Fireplaces     1 1.8327e+10 4.5542e+11 4065.7
## - Year.built           1 2.4203e+10 4.6130e+11 4068.1
## - Bedrooms             1 2.5700e+10 4.6279e+11 4068.7
## - Pool                 1 6.0737e+10 4.9783e+11 4082.4
## - X..of.garage.spaces  1 1.0884e+11 5.4593e+11 4099.6
## - Main.house.sqft      1 4.1458e+11 8.5167e+11 4182.8
## - Lot.acres            1 4.3186e+11 8.6895e+11 4186.5
## 
## Step:  AIC=4059.28
## as.numeric(Listing.price) ~ Year.built + Type + Bedrooms + X..of.garage.spaces + 
##     X..of.carports + Lot.acres + Main.house.sqft + X..of.Fireplaces + 
##     Pool + Status
## 
##                       Df  Sum of Sq        RSS    AIC
## <none>                              4.4005e+11 4059.3
## - X..of.carports       1 6.4588e+09 4.4651e+11 4060.0
## - Type                 1 1.1549e+10 4.5160e+11 4062.1
## - Status               2 2.4307e+10 4.6435e+11 4065.3
## - X..of.Fireplaces     1 1.9517e+10 4.5956e+11 4065.4
## - Year.built           1 2.4584e+10 4.6463e+11 4067.4
## - Bedrooms             1 2.9081e+10 4.6913e+11 4069.2
## - Pool                 1 6.4197e+10 5.0424e+11 4082.7
## - X..of.garage.spaces  1 1.0669e+11 5.4674e+11 4097.9
## - Lot.acres            1 4.3471e+11 8.7475e+11 4185.8
## - Main.house.sqft      1 4.9839e+11 9.3844e+11 4198.9
#CREATE MODEL GIVEN BY STEP FUNCTION

step_model_two <- lm(as.numeric(Listing.price)~ X..of.Fireplaces+
                   Status +
                   Pool +
                   Taxes +
                   Total.baths+
                   Type +
                   Bedrooms +
                   Year.built +
                   Lot.acres +
                   Main.house.sqft,data = train_three)

step_model_summary_two <- summary(step_model_two)

test_three$predicted_p  <- predict(step_model_two,test_three)

step_model_Listing_price_mse <- mean((test_three$Listing.price - test_three$predicted_p)^2)

info_frame_two[2,1] <- step_model_Listing_price_mse
info_frame_two[2,2] <- sqrt(step_model_Listing_price_mse)
info_frame_two[2,3] <- step_model_summary_two$r.squared*100
info_frame_two[2,4] <- data_3$Zip[1]
info_frame_two[2,5] <- nrow(data_3)
info_frame_two[2,6] <- "AIC"
######################################################################## PART 2
# MODEL 3
set.seed(123)
partition_four <- createDataPartition(data_4$Listing.price, p = 0.8, list = FALSE)
#SPLIT INTO TEST AND TRAIN
train_four <- data_4[partition_two,]
test_four <- data_4[-partition_two,]

#CREATE MODEL WITH ORIGINAL DATAFRAME
model_set_three <- lm(as.numeric(Listing.price) ~ Year.built+Type+ Bedrooms+Total.baths+
                        X1.2.Baths +  X..of.garage.spaces+
                        X..of.carports+ Lot.acres+ Lot.sqft+ Taxes+
                        Main.house.sqft+ X..of.Fireplaces+Pool+Status
                      ,data = data_4)

#USE STEP FUNCTION TO SEE THE NEW MODEL WE GET 
step_model_final_three <- step(model_set_three)
## Start:  AIC=4857.7
## as.numeric(Listing.price) ~ Year.built + Type + Bedrooms + Total.baths + 
##     X1.2.Baths + X..of.garage.spaces + X..of.carports + Lot.acres + 
##     Lot.sqft + Taxes + Main.house.sqft + X..of.Fireplaces + Pool + 
##     Status
## 
##                       Df  Sum of Sq        RSS    AIC
## - Status               2 1.6680e+09 1.6312e+12 4853.9
## - X1.2.Baths           1 4.1591e+08 1.6300e+12 4855.8
## - Total.baths          1 9.7112e+08 1.6306e+12 4855.8
## - Taxes                1 5.4957e+09 1.6351e+12 4856.4
## <none>                              1.6296e+12 4857.7
## - Lot.acres            1 2.8093e+10 1.6577e+12 4859.3
## - Lot.sqft             1 3.0421e+10 1.6600e+12 4859.6
## - X..of.garage.spaces  1 3.4494e+10 1.6641e+12 4860.1
## - Bedrooms             1 3.7124e+10 1.6667e+12 4860.5
## - X..of.carports       1 4.6953e+10 1.6765e+12 4861.7
## - Pool                 1 1.0458e+11 1.7342e+12 4868.9
## - Type                 1 1.0941e+11 1.7390e+12 4869.5
## - X..of.Fireplaces     1 1.3004e+11 1.7596e+12 4872.0
## - Year.built           1 2.2338e+11 1.8530e+12 4882.9
## - Main.house.sqft      1 4.0092e+11 2.0305e+12 4902.3
## 
## Step:  AIC=4853.92
## as.numeric(Listing.price) ~ Year.built + Type + Bedrooms + Total.baths + 
##     X1.2.Baths + X..of.garage.spaces + X..of.carports + Lot.acres + 
##     Lot.sqft + Taxes + Main.house.sqft + X..of.Fireplaces + Pool
## 
##                       Df  Sum of Sq        RSS    AIC
## - X1.2.Baths           1 4.7160e+08 1.6317e+12 4852.0
## - Total.baths          1 7.8025e+08 1.6320e+12 4852.0
## - Taxes                1 7.1318e+09 1.6384e+12 4852.8
## <none>                              1.6312e+12 4853.9
## - Lot.acres            1 2.8238e+10 1.6595e+12 4855.6
## - Lot.sqft             1 3.0573e+10 1.6618e+12 4855.9
## - X..of.garage.spaces  1 3.3837e+10 1.6651e+12 4856.3
## - Bedrooms             1 4.0543e+10 1.6718e+12 4857.1
## - X..of.carports       1 4.7968e+10 1.6792e+12 4858.1
## - Pool                 1 1.0310e+11 1.7344e+12 4864.9
## - Type                 1 1.0957e+11 1.7408e+12 4865.7
## - X..of.Fireplaces     1 1.3045e+11 1.7617e+12 4868.2
## - Year.built           1 2.3734e+11 1.8686e+12 4880.7
## - Main.house.sqft      1 4.1035e+11 2.0416e+12 4899.5
## 
## Step:  AIC=4851.98
## as.numeric(Listing.price) ~ Year.built + Type + Bedrooms + Total.baths + 
##     X..of.garage.spaces + X..of.carports + Lot.acres + Lot.sqft + 
##     Taxes + Main.house.sqft + X..of.Fireplaces + Pool
## 
##                       Df  Sum of Sq        RSS    AIC
## - Total.baths          1 2.6798e+09 1.6344e+12 4850.3
## - Taxes                1 6.7869e+09 1.6385e+12 4850.9
## <none>                              1.6317e+12 4852.0
## - Lot.acres            1 2.8121e+10 1.6598e+12 4853.6
## - Lot.sqft             1 3.0446e+10 1.6622e+12 4853.9
## - X..of.garage.spaces  1 3.5015e+10 1.6667e+12 4854.5
## - Bedrooms             1 4.4818e+10 1.6765e+12 4855.7
## - X..of.carports       1 4.7497e+10 1.6792e+12 4856.1
## - Pool                 1 1.0313e+11 1.7348e+12 4863.0
## - Type                 1 1.0930e+11 1.7410e+12 4863.7
## - X..of.Fireplaces     1 1.3057e+11 1.7623e+12 4866.3
## - Year.built           1 2.4463e+11 1.8764e+12 4879.6
## - Main.house.sqft      1 4.0989e+11 2.0416e+12 4897.5
## 
## Step:  AIC=4850.33
## as.numeric(Listing.price) ~ Year.built + Type + Bedrooms + X..of.garage.spaces + 
##     X..of.carports + Lot.acres + Lot.sqft + Taxes + Main.house.sqft + 
##     X..of.Fireplaces + Pool
## 
##                       Df  Sum of Sq        RSS    AIC
## - Taxes                1 8.5227e+09 1.6429e+12 4849.4
## <none>                              1.6344e+12 4850.3
## - Lot.acres            1 2.9379e+10 1.6638e+12 4852.1
## - Lot.sqft             1 3.1751e+10 1.6662e+12 4852.4
## - X..of.garage.spaces  1 3.2796e+10 1.6672e+12 4852.5
## - Bedrooms             1 4.2678e+10 1.6771e+12 4853.8
## - X..of.carports       1 4.7548e+10 1.6819e+12 4854.4
## - Pool                 1 1.0299e+11 1.7374e+12 4861.3
## - Type                 1 1.1224e+11 1.7466e+12 4862.4
## - X..of.Fireplaces     1 1.2959e+11 1.7640e+12 4864.5
## - Year.built           1 2.4878e+11 1.8832e+12 4878.4
## - Main.house.sqft      1 7.3517e+11 2.3696e+12 4927.1
## 
## Step:  AIC=4849.43
## as.numeric(Listing.price) ~ Year.built + Type + Bedrooms + X..of.garage.spaces + 
##     X..of.carports + Lot.acres + Lot.sqft + Main.house.sqft + 
##     X..of.Fireplaces + Pool
## 
##                       Df  Sum of Sq        RSS    AIC
## <none>                              1.6429e+12 4849.4
## - Lot.acres            1 2.7623e+10 1.6705e+12 4851.0
## - X..of.garage.spaces  1 2.9674e+10 1.6726e+12 4851.2
## - Lot.sqft             1 2.9938e+10 1.6729e+12 4851.3
## - Bedrooms             1 4.2224e+10 1.6851e+12 4852.8
## - X..of.carports       1 4.5178e+10 1.6881e+12 4853.2
## - Pool                 1 1.1784e+11 1.7608e+12 4862.1
## - Type                 1 1.3220e+11 1.7751e+12 4863.8
## - X..of.Fireplaces     1 1.4572e+11 1.7886e+12 4865.4
## - Year.built           1 2.4102e+11 1.8839e+12 4876.5
## - Main.house.sqft      1 7.4314e+11 2.3861e+12 4926.5
#CREATE MODEL GIVEN BY STEP FUNCTION

step_model_three <- lm(as.numeric(Listing.price)~ X..of.Fireplaces+
                   X..of.carports+
                   X..of.garage.spaces+
                   Status +
                   Pool +
                   Taxes +
                   Total.baths+
                   Type +
                   Bedrooms +
                   Year.built +
                   Lot.acres +
                   Main.house.sqft,data = train_four)

step_model_summary_three <- summary(step_model_three)

test_four$predicted_p  <- predict(step_model_three,test_four)

step_model_Listing_price_mse <- mean((test_four$Listing.price - test_four$predicted_p)^2)

info_frame_two[3,1] <- step_model_Listing_price_mse
info_frame_two[3,2] <- sqrt(step_model_Listing_price_mse)
info_frame_two[3,3] <- step_model_summary_three$r.squared*100
info_frame_two[3,4] <- data_4$Zip[1]
info_frame_two[3,5] <- nrow(data_4)
info_frame_two[3,6] <- "AIC"
#NAME COLUMNS AND PLACE MODEL DATA INTO FRAME
names(info_frame_two) <- names
head(info_frame_two)
##           MSE      RMSE      R^2   ZIP ROWS MODEL TYPE
## 1   254208859  15943.93 84.40917 85757  156        AIC
## 2  3695546028  60791.00 81.32775 85629  187        AIC
## 3 26517743603 162842.70 85.29671 85641  212        AIC
info_frame
##             MSE      RMSE      R^2   ZIP ROWS MODEL TYPE
## 1    3249159471  57001.40 89.42592 85614  176    Non AIC
## 2     279512849  16718.64 85.26158 85757  156    Non AIC
## 3    2630064805  51284.16 86.07035 85629  187    Non AIC
## 4    5942036148  77084.60 78.61366 85641  212    Non AIC
## 5    4676032959  68381.52 81.45764 85653  252    Non AIC
## 6  203242275587 450824.00 85.18024 85658  132    Non AIC
## 7    1691652065  41129.70 93.95959 85710  122    Non AIC
## 8   16307548948 127701.01 88.02051 85743  125    Non AIC
## 9    1523734614  39035.04 92.75811 85747  145    Non AIC
## 10  91147167557 301905.89 86.66333 85755  132    Non AIC
#Author: Haley Bejarano

library(knitr)
library(tidyverse)
library(ggplot2)
library(ggthemes)

#This imports the data
RE_data <- read_csv("Housing_Data_Clean.csv")
# 1 - Scatter Plots for the listing price in the zip code it resides in
price_zip_type_plot <- ggplot(RE_data, aes(x = Zip, y = Listing.price/1000, 
                                           fill = Housing.type)) + 
  geom_bar(stat = "summary", fun = "mean") +
  ggtitle("Average of House Listing Price in each Zipcode") +
  xlab("Zip Code") + ylab("Current Lising Price (in thousands)") +
  xlim(85600,85760) + facet_wrap(~Housing.type, scales = "free") 
price_zip_type_plot

#reorder?
# 2 - Scatter Plots for the listing price for the year it was built in
price_year_type_plot <- ggplot(RE_data, aes(x = Year.built, y = Listing.price/1000, 
                                            color = Housing.type)) + 
  geom_point(alpha = .5) +
  ggtitle("Average of House Listing Price in each Year when House was Built") + 
  xlab("Year Housing was Built") + ylab("Current Lising Price (in thousands)") +
  xlim(1900,2024)
price_year_type_plot

# Turn into scatter lot
# 3 - Scatter Plots for the tax amounts in each school district
price_school_type_plot <- ggplot(RE_data, aes(x = School.district, y = Taxes,
                                              fill = Housing.type)) + 
  geom_bar(stat = "summary", fun = "mean") +
  ggtitle("Average of Housing Taxes in each School District") +
  xlab("School District") + ylab("Current Lising Price (in thousands)") + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_fill_colorblind()
price_school_type_plot

#facet_wrap & reorder
# 4 - These bar graphs see if pools are present pools by listing price by 
# housing type
pool_price_type_plot <- RE_data %>%
  ggplot(aes(x = Housing.type, y = Listing.price, fill = Pool)) +
  geom_bar(stat = "summary", fun = "mean", position = "dodge") +
  ggtitle("Average of Lisitng Price with Pools for Different Housing Types") +
  xlab("Pool on Property") + ylab("Lising Price (in thousands)")
pool_price_type_plot

# 5 - Bar Plots for the listing price in the housing style it resides in
style_price_style_plot <- ggplot(RE_data, aes(x = House.style, y = Listing.price/1000, 
                                           fill = Housing.type)) + 
  geom_bar(stat = "summary", fun = "mean") +
  ggtitle("Average of House Listing Price by Style of Home Design") +
  xlab("Zip Code") + ylab("Current Lising Price (in thousands)") + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  facet_wrap(~Housing.type) + scale_fill_colorblind()
style_price_style_plot

# 6 - Scatter plot for the house square footage for the year it was built in
sqft_year_type_plot <- ggplot(RE_data, aes(x = Year.built, y = Main.house.sqft, 
                                            color = Housing.type)) + 
  geom_point(alpha = .5) +
  ggtitle("Average of House Footage in each Year when House was Built") + 
  xlab("Year Built") + ylab("Main House Square Footage") + 
  xlim(1870,2024)
sqft_year_type_plot

# 7 - Bar Plots for the listing price by the number of bedrooms
price_bedroom_type_plot <- ggplot(RE_data, aes(x = Bedrooms, y = Listing.price/1000, 
                                            fill = Housing.type)) + 
  geom_bar(stat = "summary", fun = "mean") +
  ggtitle("Average of House Listing Price by Number of Bedrooms") + 
  xlab("Number of Bedrooms") + ylab("Current Lising Price (in thousands)") + 
  scale_fill_colorblind()
price_bedroom_type_plot

# 8 - Bar Plots for the listing price by the number of total baths
price_bath_type_plot <- ggplot(RE_data, aes(x = Total.baths, y = Listing.price/1000, 
                                            fill = Housing.type)) + 
  geom_bar(stat = "summary", fun = "mean") +
  ggtitle("Average of House Listing Price by Total Number of Baths") + 
  xlab("Total Number of Baths") + ylab("Current Lising Price (in thousands)") +
  scale_fill_colorblind()
price_bath_type_plot

# 9 - Bar Plots for the housing taxes in the housing style it 
# resides in
style_year_type_plot <- ggplot(RE_data, aes(x = reorder(House.style, Taxes),
                                            y = Taxes, fill = Housing.type)) + 
  geom_bar(stat = "summary", fun = "mean") +
  ggtitle("Average of House Taxes by Style of Home Design") +
  xlab("Housing Style") + ylab("House taxes") + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  facet_wrap(~Housing.type) + scale_fill_colorblind()
style_year_type_plot

# 10 - Bar Plots for the listing price of the house in the housing style it 
# resides in
zip_price_style_plot <- ggplot(RE_data, aes(x = Zip, y = Listing.price/1000, 
                                             fill = House.style)) + 
  geom_bar(stat = "summary", fun = "mean") +
  ggtitle("Average of House Listing Price by Style of Home Design") +
  xlab("Zip Code") + ylab("Current Listing Price (in thousands)") +
  xlim(85600,85760) + facet_wrap(~House.style, scales = "free")
zip_price_style_plot