setwd("C:/Users/linye/Desktop/Tandon2016Fall/MachineLearning/project1/train_users_2.csv")
airbnb.simple <- read.csv("cleaned_simple.csv", row.names = 1)
random select rows as train data
set.seed(3)
train <- sample(1:nrow(airbnb.simple), nrow(airbnb.simple)/2)
airbnb.simple.test <- subset(airbnb.simple[-train, ], select = -country_destination)
destination.test <- airbnb.simple$country_destination[-train]
decision tree
library(tree)
tree.airbnb.simple <- tree(country_destination~.-country_destination,
airbnb.simple,
subset = train)
tree.airbnb.simple #one node,,,
## node), split, n, deviance, yval, (yprob)
## * denotes terminal node
##
## 1) root 27995 33800 US ( 0.2918 0.7082 ) *
tree.pred <- predict(tree.airbnb.simple, airbnb.simple.test, type = "class")
table(tree.pred, destination.test)
## destination.test
## tree.pred non_US US
## non_US 0 0
## US 8194 19802
random forest
library(randomForest)
## randomForest 4.6-12
## Type rfNews() to see new features/changes/bug fixes.
rf.airbnb <- randomForest(country_destination~.-country_destination,
data = airbnb.simple,
subset = train,
importance = T)
rf.pred <- predict(rf.airbnb, newdata = airbnb.simple.test, type = "class")
table(rf.pred, destination.test)
## destination.test
## rf.pred non_US US
## non_US 196 256
## US 7998 19546