Clean the data from json to data frame
knitr::opts_chunk$set(echo = TRUE)
suppressWarnings(library("jsonlite"))
suppressWarnings(library("lubridate"))
suppressWarnings(library("caTools"))
suppressWarnings(library("nnet"))
suppressWarnings(library("caret"))
suppressWarnings(library("statisticalModeling"))
suppressWarnings(library(h2o))
json_file <- "C:/Users/dannyhuang/Desktop/train.json"
suppressWarnings(library(lubridate))
suppressWarnings(library(MASS))
suppressWarnings(library("Hmisc"))
x1 <-fromJSON(json_file, simplifyDataFrame = TRUE)
df <- data.frame(bathrooms = as.numeric(unlist(x1$bathrooms))
,bedrooms=as.numeric(unlist(x1$bedrooms))
,building_id=as.factor(unlist(x1$building_id))
#,created=as.POSIXct(unlist(x1$created))
#,latitude=unlist(x1$latitude)
#,longitude=unlist(x1$longitude)
,price=as.numeric(unlist(x1$price))
,features_num = as.numeric(unlist(lapply(x1$features ,length)))
,photo_num = as.numeric(unlist(lapply(x1$photos,length)))
,interest_level=as.factor(unlist(x1$interest_level)))
Calculate the effect size(Price Change) of each feature explanatory variable
str(df)
## 'data.frame': 49352 obs. of 7 variables:
## $ bathrooms : num 1 1 1 1.5 1 1 2 1 0 3 ...
## $ bedrooms : num 1 2 2 3 0 3 3 0 1 3 ...
## $ building_id : Factor w/ 7585 levels "0","00005cb939f9986300d987652c933e15",..: 3941 5494 6096 2432 5690 1390 375 6313 6232 2783 ...
## $ price : num 2400 3800 3495 3000 2795 ...
## $ features_num : num 7 6 6 0 4 6 5 5 1 2 ...
## $ photo_num : num 12 6 6 5 4 5 7 5 4 11 ...
## $ interest_level: Factor w/ 3 levels "high","low","medium": 3 2 3 3 2 2 2 1 2 2 ...
price_model_1 <- lm(price ~ bathrooms + bedrooms+ features_num ,
data = df) # Build the model
effect_size(price_model_1, ~ bathrooms, step = 1)# Effect size of bathrooms,When increase one bathrooms,rental price increase 2501.189
## slope bathrooms to:bathrooms bedrooms features_num
## 1 2501.189 1 2 1 5
effect_size(price_model_1, ~ bedrooms, step = 1)# Effect size of bedrooms,When increase one bedrooms,rental price increase 402.9566
## slope bedrooms to:bedrooms bathrooms features_num
## 1 402.9566 1 2 1 5
effect_size(price_model_1, ~ features_num, step = 1)# Effect size of features_num, When increase one features_num,rental price increase 47.99
## slope features_num to:features_num bathrooms bedrooms
## 1 47.9915 5 6 1 1
Calculate price difference under certain scenario: Increase one feature_numbers with other variables held constant
evaluate_model(price_model_1, bedrooms = 3,bathrooms=1, features_num = 4)# Evaluate the model in scenario 1
## bathrooms bedrooms features_num model_output
## 1 1 3 4 3818.573
evaluate_model(price_model_1, bedrooms = 3,bathrooms=1,features_num = 5)# Evaluate the model in scenario 2
## bathrooms bedrooms features_num model_output
## 1 1 3 5 3866.565
price_difference <- 3866.565-3818.573
price_difference
## [1] 47.992