install.packages("tidyr")
install.packages("ggplot2")
install.packages("showtext")
install.packages("stringr")
install.packages("DT")
install.packages("plyr")
install.packages("readr")
install.packages("plyr")
# Library
library(tidyr)
library(ggplot2)
library(showtext)
library(stringr)
library(DT)
library(scales)
library(plyr)
library(readr)
從站長平台載下csv,統一放到資料夾中
# Set path
PATH <- "/Users/jesschen/Documents/訂房/R/searchengine"
setwd(PATH)
# Load in data & Add domain column
files_main = list.files(PATH, pattern="*_main.csv", full.names=TRUE)
dta1 = ldply(files_main, read.csv)
files_hotel = list.files(PATH, pattern="*_hotel.csv", full.names=TRUE)
dta2 = ldply(files_hotel, read.csv)
清理沒使用到的欄位、調整欄位(如:把字串換成日期格式)
# Merge data
data <- rbind(dta1, dta2)
# Clean unused columns
dta <- data[,colSums(is.na(data))<nrow(data)]
# Manage time columns
dta$CreateDate <- as.Date(gsub( " .*$", "", dta$CreateDate), format = "%Y/%m/%d")
dta$DepartureDateStart <- as.Date(gsub( " .*$", "", dta$DepartureDateStart), format = "%Y/%m/%d")
dta$DepartureDateEnd <- as.Date(gsub( " .*$", "", dta$DepartureDateEnd), format = "%Y/%m/%d")
# Manage Letter Cases
dta$WebLanguage <- tolower(dta$WebLanguage)
比如換算預訂日~入住日相差幾天
## Add columns
dta$Nights <- as.numeric(difftime(dta$DepartureDateEnd, dta$DepartureDateStart, units = "days"))
dta$PreBookDays <- as.numeric(difftime(dta$DepartureDateStart, dta$CreateDate, units = "days"))
dta$AdultChildCombine <- str_c(dta$AdultCount, "+", dta$ChildCount)
dta$CurrentPage <- ifelse(grepl("search", dta$CurrentUrl, ignore.case = T), "Search",
ifelse(grepl("detail", dta$CurrentUrl, ignore.case = T), "Detail",
ifelse(grepl("category", dta$CurrentUrl, ignore.case = T), "Category", "Other")))
dta$CurrentPageDomain <- str_c(dta$Domain, "/", dta$CurrentPage)
dta$Count <- 1
表格
datatable(as.data.frame(table(dta$DCityTxt)),
rownames = F,
colnames = c('目的地', 'Count'))
直方圖
ggplot(dta, aes(x = Platform, fill = Platform)) +
geom_bar() +
geom_text(stat='count', aes(label=..count..), vjust=0) +
labs(title="裝置占比", x="", y="")