setwd("C:/Users/linye/Desktop/Tandon2016Fall/MachineLearning/project1/train_users_2.csv")
airbnb.simple <- read.csv("cleaned_simple.csv", row.names = 1)

random select rows as train data

set.seed(3)
train <- sample(1:nrow(airbnb.simple), nrow(airbnb.simple)/2)

airbnb.simple$country_destination <- ifelse(airbnb.simple$country_destination == "US", 0, 1)

airbnb.simple.train <- subset(airbnb.simple[train, ], select = -country_destination)
destination.train <- airbnb.simple$country_destination[train]

airbnb.simple.test <- subset(airbnb.simple[-train, ], select = -country_destination)
destination.test <- airbnb.simple$country_destination[-train]

Boosting

library(gbm)
## Loading required package: survival
## Loading required package: lattice
## Loading required package: splines
## Loading required package: parallel
## Loaded gbm 2.1.1
boost.airbnb <- gbm.fit(airbnb.simple.train,
                        destination.train,
                        distribution = "bernoulli",
                        n.trees = 500,
                        shrinkage = 0.01,
                        interaction.depth = 5)
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1        1.2072             nan     0.0100    0.0001
##      2        1.2070             nan     0.0100    0.0001
##      3        1.2068             nan     0.0100    0.0001
##      4        1.2066             nan     0.0100    0.0001
##      5        1.2065             nan     0.0100    0.0001
##      6        1.2062             nan     0.0100    0.0001
##      7        1.2061             nan     0.0100    0.0001
##      8        1.2059             nan     0.0100    0.0001
##      9        1.2057             nan     0.0100    0.0001
##     10        1.2056             nan     0.0100    0.0001
##     20        1.2039             nan     0.0100    0.0001
##     40        1.2011             nan     0.0100    0.0000
##     60        1.1989             nan     0.0100    0.0000
##     80        1.1971             nan     0.0100    0.0000
##    100        1.1955             nan     0.0100    0.0000
##    120        1.1943             nan     0.0100    0.0000
##    140        1.1932             nan     0.0100    0.0000
##    160        1.1923             nan     0.0100    0.0000
##    180        1.1914             nan     0.0100    0.0000
##    200        1.1906             nan     0.0100    0.0000
##    220        1.1899             nan     0.0100   -0.0000
##    240        1.1892             nan     0.0100    0.0000
##    260        1.1886             nan     0.0100    0.0000
##    280        1.1880             nan     0.0100   -0.0000
##    300        1.1874             nan     0.0100    0.0000
##    320        1.1869             nan     0.0100   -0.0000
##    340        1.1865             nan     0.0100   -0.0000
##    360        1.1860             nan     0.0100    0.0000
##    380        1.1856             nan     0.0100   -0.0000
##    400        1.1851             nan     0.0100   -0.0000
##    420        1.1848             nan     0.0100   -0.0000
##    440        1.1844             nan     0.0100    0.0000
##    460        1.1839             nan     0.0100   -0.0000
##    480        1.1836             nan     0.0100   -0.0000
##    500        1.1832             nan     0.0100   -0.0000
boost.pred <- predict(boost.airbnb,
                      newdata = airbnb.simple.test,
                      n.trees = 500,
                      type = "response")

table(round(boost.pred), destination.test)
##    destination.test
##         0     1
##   0 19789  8180
##   1    13    14