suppressWarnings(library("jsonlite"))
suppressWarnings(library("lubridate"))
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
suppressWarnings(library("caret"))
## Loading required package: lattice
## Loading required package: ggplot2
suppressWarnings(library(h2o))
## 
## ----------------------------------------------------------------------
## 
## Your next step is to start H2O:
##     > h2o.init()
## 
## For H2O package documentation, ask for help:
##     > ??h2o
## 
## After starting H2O, you can use the Web UI at http://localhost:54321
## For more information visit http://docs.h2o.ai
## 
## ----------------------------------------------------------------------
## 
## Attaching package: 'h2o'
## The following objects are masked from 'package:lubridate':
## 
##     day, hour, month, week, year
## The following objects are masked from 'package:stats':
## 
##     cor, sd, var
## The following objects are masked from 'package:base':
## 
##     %*%, %in%, &&, ||, apply, as.factor, as.numeric, colnames,
##     colnames<-, ifelse, is.character, is.factor, is.numeric, log,
##     log10, log1p, log2, round, signif, trunc
json_file <- "C:/Users/dannyhuang/Desktop/train.json"
suppressWarnings(library(lubridate))
suppressWarnings(library(MASS))


x1 <-fromJSON(json_file, simplifyDataFrame = TRUE)

df <- data.frame(bathrooms = as.numeric(unlist(x1$bathrooms))
                 ,bedrooms=as.numeric(unlist(x1$bedrooms))
                 ,building_id=as.factor(unlist(x1$building_id))
                 #,created=as.POSIXct(unlist(x1$created))
                 ,latitude=unlist(x1$latitude)
                 ,longitude=unlist(x1$longitude)
                 ,price=as.numeric(unlist(x1$price))
                 ,features_num = as.numeric(unlist(lapply(x1$features ,length)))
                 ,photo_num = as.numeric(unlist(lapply(x1$photos,length)))
                 ,interest_level=as.factor(unlist(x1$interest_level)))
str(df)
## 'data.frame':    49352 obs. of  9 variables:
##  $ bathrooms     : num  1 1 1 1.5 1 1 2 1 0 3 ...
##  $ bedrooms      : num  1 2 2 3 0 3 3 0 1 3 ...
##  $ building_id   : Factor w/ 7585 levels "0","00005cb939f9986300d987652c933e15",..: 3941 5494 6096 2432 5690 1390 375 6313 6232 2783 ...
##  $ latitude      : num  40.7 40.8 40.8 40.7 40.7 ...
##  $ longitude     : num  -74 -74 -74 -73.9 -74 ...
##  $ price         : num  2400 3800 3495 3000 2795 ...
##  $ features_num  : num  7 6 6 0 4 6 5 5 1 2 ...
##  $ photo_num     : num  12 6 6 5 4 5 7 5 4 11 ...
##  $ interest_level: Factor w/ 3 levels "high","low","medium": 3 2 3 3 2 2 2 1 2 2 ...
library(plotly)
## Warning: package 'plotly' was built under R version 3.3.2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:MASS':
## 
##     select
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(ggmap)
## Warning: package 'ggmap' was built under R version 3.3.3
## 
## Attaching package: 'ggmap'
## The following object is masked from 'package:plotly':
## 
##     wind
library(purrr)
## Warning: package 'purrr' was built under R version 3.3.2
## 
## Attaching package: 'purrr'
## The following object is masked from 'package:caret':
## 
##     lift
## The following object is masked from 'package:jsonlite':
## 
##     flatten
ggmap(get_googlemap(center='new york city', zoom=10, maptype='roadmap'), extent='device')+geom_point(data=df, aes(x=longitude, y=latitude,color= interest_level),alpha=0.7)
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=new+york+city&zoom=10&size=640x640&scale=2&maptype=roadmap&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=new%20york%20city&sensor=false
## Warning: `panel.margin` is deprecated. Please use `panel.spacing` property
## instead
## Warning: Removed 45 rows containing missing values (geom_point).

Sys.setenv('MAPBOX_TOKEN'= 'pk.eyJ1IjoiZGFubnlodWFuZyIsImEiOiJjajB2MHhxbXEwMzdpMzhscmcwbHZ0YXdtIn0.tvgmqm8TUM4gvdNGJAvNLA')
p <- df %>%
  plot_mapbox(lat = ~latitude, lon = ~longitude,
              split = ~interest_level, size=2,
              mode = 'scattermapbox', hoverinfo='price') %>%
 layout(
         font = list(color='white'),
         legend = list(orientation = 'h',
                       font = list(size = 8)))
    
p