random forest

setwd("C:/Users/linye/Desktop/Tandon2016Fall/MachineLearning/project1/train_users_2.csv")
airbnb.simple <- read.csv("cleaned_simple.csv", row.names = 1)

random select rows as train data

set.seed(3)
train <- sample(1:nrow(airbnb.simple), nrow(airbnb.simple)/2)
airbnb.simple.test <- subset(airbnb.simple[-train, ], select = -country_destination)
destination.test <- airbnb.simple$country_destination[-train]

decision tree

library(tree)
tree.airbnb.simple <- tree(country_destination~.-country_destination,
                           airbnb.simple,
                           subset = train)
tree.airbnb.simple #one node,,,

## node), split, n, deviance, yval, (yprob)
##       * denotes terminal node
## 
## 1) root 27995 33800 US ( 0.2918 0.7082 ) *

tree.pred <- predict(tree.airbnb.simple, airbnb.simple.test, type = "class")

table(tree.pred, destination.test)

##          destination.test
## tree.pred non_US    US
##    non_US      0     0
##    US       8194 19802

random forest

library(randomForest)

## randomForest 4.6-12

## Type rfNews() to see new features/changes/bug fixes.

rf.airbnb <- randomForest(country_destination~.-country_destination,
                          data = airbnb.simple,
                          subset = train,
                          importance = T)

rf.pred <- predict(rf.airbnb, newdata = airbnb.simple.test, type = "class")
table(rf.pred, destination.test)

##         destination.test
## rf.pred  non_US    US
##   non_US    196   256
##   US       7998 19546

random forest

Ye Lin

October 11, 2016