Clean the data from json to data frame

knitr::opts_chunk$set(echo = TRUE)
suppressWarnings(library("jsonlite"))
suppressWarnings(library("lubridate"))
suppressWarnings(library("caTools"))
suppressWarnings(library("nnet"))
suppressWarnings(library("caret"))
suppressWarnings(library("statisticalModeling"))
suppressWarnings(library(h2o))
json_file <- "C:/Users/dannyhuang/Desktop/train.json"
suppressWarnings(library(lubridate))
suppressWarnings(library(MASS))

suppressWarnings(library("Hmisc"))

x1 <-fromJSON(json_file, simplifyDataFrame = TRUE)

df <- data.frame(bathrooms = as.numeric(unlist(x1$bathrooms))
                 ,bedrooms=as.numeric(unlist(x1$bedrooms))
                 ,building_id=as.factor(unlist(x1$building_id))
                 #,created=as.POSIXct(unlist(x1$created))
                 #,latitude=unlist(x1$latitude)
                 #,longitude=unlist(x1$longitude)
                 ,price=as.numeric(unlist(x1$price))
                 ,features_num = as.numeric(unlist(lapply(x1$features ,length)))
                 ,photo_num = as.numeric(unlist(lapply(x1$photos,length)))
                 ,interest_level=as.factor(unlist(x1$interest_level)))

Calculate the effect size(Price Change) of each feature explanatory variable

str(df)
## 'data.frame':    49352 obs. of  7 variables:
##  $ bathrooms     : num  1 1 1 1.5 1 1 2 1 0 3 ...
##  $ bedrooms      : num  1 2 2 3 0 3 3 0 1 3 ...
##  $ building_id   : Factor w/ 7585 levels "0","00005cb939f9986300d987652c933e15",..: 3941 5494 6096 2432 5690 1390 375 6313 6232 2783 ...
##  $ price         : num  2400 3800 3495 3000 2795 ...
##  $ features_num  : num  7 6 6 0 4 6 5 5 1 2 ...
##  $ photo_num     : num  12 6 6 5 4 5 7 5 4 11 ...
##  $ interest_level: Factor w/ 3 levels "high","low","medium": 3 2 3 3 2 2 2 1 2 2 ...
price_model_1 <- lm(price ~ bathrooms + bedrooms+ features_num , 
                    data = df) # Build the model

effect_size(price_model_1, ~ bathrooms, step = 1)# Effect size of bathrooms,When increase one bathrooms,rental price increase 2501.189
##      slope bathrooms to:bathrooms bedrooms features_num
## 1 2501.189         1            2        1            5
effect_size(price_model_1, ~ bedrooms, step = 1)# Effect size of bedrooms,When increase one bedrooms,rental price increase 402.9566      
##      slope bedrooms to:bedrooms bathrooms features_num
## 1 402.9566        1           2         1            5
effect_size(price_model_1, ~ features_num, step = 1)# Effect size of features_num, When increase one features_num,rental price increase 47.99
##     slope features_num to:features_num bathrooms bedrooms
## 1 47.9915            5               6         1        1

Calculate price difference under certain scenario: Increase one feature_numbers with other variables held constant

evaluate_model(price_model_1, bedrooms = 3,bathrooms=1, features_num = 4)# Evaluate the model in scenario 1
##   bathrooms bedrooms features_num model_output
## 1         1        3            4     3818.573
evaluate_model(price_model_1, bedrooms = 3,bathrooms=1,features_num = 5)# Evaluate the model in scenario 2
##   bathrooms bedrooms features_num model_output
## 1         1        3            5     3866.565
price_difference <- 3866.565-3818.573
price_difference
## [1] 47.992