Preprocess the data

library(tidyr)

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
d1<-read.csv("https://raw.githubusercontent.com/tuyenhavan/Statistics/KiwiMotel/August.csv")

# 

d2<-d1[-26,]


d2<-d2 %>% tidyr::gather(Days,Status,X1:X31)

head(d2)
##    Rooms        Types Days Status
## 1 Unit 1 King Studios   X1      0
## 2 Unit 2 King Studios   X1      0
## 3 Unit 3 King Studios   X1      0
## 4 Unit 4 King Studios   X1      0
## 5 Unit 5 King Studios   X1      1
## 6 Unit 6 King Studios   X1      1
#
d_rate<-d1[26,]

d_rate<-as.vector(d_rate)

d_rate<-data.frame(d_rate)

d_rate<-d_rate %>% tidyr::gather(Days,Rate,X1:X31)

d_rate<-d_rate[,c(3,4)]

head(d_rate)
##   Days Rate
## 1   X1   32
## 2   X2   52
## 3   X3   32
## 4   X4   36
## 5   X5   44
## 6   X6   52
d_Aug<-full_join(d2,d_rate,by="Days")


d_Aug$Days<-gsub("X","",d_Aug$Days)


d_Aug$Month<-c("August")


head(d_Aug)
##    Rooms        Types Days Status Rate  Month
## 1 Unit 1 King Studios    1      0   32 August
## 2 Unit 2 King Studios    1      0   32 August
## 3 Unit 3 King Studios    1      0   32 August
## 4 Unit 4 King Studios    1      0   32 August
## 5 Unit 5 King Studios    1      1   32 August
## 6 Unit 6 King Studios    1      1   32 August
August<-d_Aug

head(August)
##    Rooms        Types Days Status Rate  Month
## 1 Unit 1 King Studios    1      0   32 August
## 2 Unit 2 King Studios    1      0   32 August
## 3 Unit 3 King Studios    1      0   32 August
## 4 Unit 4 King Studios    1      0   32 August
## 5 Unit 5 King Studios    1      1   32 August
## 6 Unit 6 King Studios    1      1   32 August
# September

d1<-read.csv("https://raw.githubusercontent.com/tuyenhavan/Statistics/KiwiMotel/September.csv")

# 

d2<-d1[-26,]


d2<-d2 %>% tidyr::gather(Days,Status,X1:X30)

head(d2)
##    Rooms        Types Days Status
## 1 Unit 1 King Studios   X1      1
## 2 Unit 2 King Studios   X1      1
## 3 Unit 3 King Studios   X1      1
## 4 Unit 4 King Studios   X1      1
## 5 Unit 5 King Studios   X1      1
## 6 Unit 6 King Studios   X1      0
#
d_rate<-d1[26,]

d_rate<-as.vector(d_rate)

d_rate<-data.frame(d_rate)

d_rate<-d_rate %>% tidyr::gather(Days,Rate,X1:X30)

d_rate<-d_rate[,c(3,4)]

head(d_rate)
##   Days Rate
## 1   X1   92
## 2   X2   60
## 3   X3   88
## 4   X4   16
## 5   X5   40
## 6   X6   40
d_Sep<-full_join(d2,d_rate,by="Days")


d_Sep$Days<-gsub("X","",d_Sep$Days)


d_Sep$Month<-c("September")


head(d_Sep)
##    Rooms        Types Days Status Rate     Month
## 1 Unit 1 King Studios    1      1   92 September
## 2 Unit 2 King Studios    1      1   92 September
## 3 Unit 3 King Studios    1      1   92 September
## 4 Unit 4 King Studios    1      1   92 September
## 5 Unit 5 King Studios    1      1   92 September
## 6 Unit 6 King Studios    1      0   92 September
September<-d_Sep

head(September)
##    Rooms        Types Days Status Rate     Month
## 1 Unit 1 King Studios    1      1   92 September
## 2 Unit 2 King Studios    1      1   92 September
## 3 Unit 3 King Studios    1      1   92 September
## 4 Unit 4 King Studios    1      1   92 September
## 5 Unit 5 King Studios    1      1   92 September
## 6 Unit 6 King Studios    1      0   92 September
# October

d1<-read.csv("https://raw.githubusercontent.com/tuyenhavan/Statistics/KiwiMotel/October.csv")

# 

d2<-d1[-26,]


d2<-d2 %>% tidyr::gather(Days,Status,X1:X31)

head(d2)
##    Rooms        Types Days Status
## 1 Unit 1 King Studios   X1      1
## 2 Unit 2 King Studios   X1      0
## 3 Unit 3 King Studios   X1      0
## 4 Unit 4 King Studios   X1      0
## 5 Unit 5 King Studios   X1      1
## 6 Unit 6 King Studios   X1      0
#
d_rate<-d1[26,]

d_rate<-as.vector(d_rate)

d_rate<-data.frame(d_rate)

d_rate<-d_rate %>% tidyr::gather(Days,Rate,X1:X31)

d_rate<-d_rate[,c(3,4)]

head(d_rate)
##   Days Rate
## 1   X1   56
## 2   X2   16
## 3   X3   36
## 4   X4   24
## 5   X5   52
## 6   X6   48
d_Oct<-full_join(d2,d_rate,by="Days")


d_Oct$Days<-gsub("X","",d_Oct$Days)


d_Oct$Month<-c("October")


head(d_Oct)
##    Rooms        Types Days Status Rate   Month
## 1 Unit 1 King Studios    1      1   56 October
## 2 Unit 2 King Studios    1      0   56 October
## 3 Unit 3 King Studios    1      0   56 October
## 4 Unit 4 King Studios    1      0   56 October
## 5 Unit 5 King Studios    1      1   56 October
## 6 Unit 6 King Studios    1      0   56 October
October<-d_Oct

head(October)
##    Rooms        Types Days Status Rate   Month
## 1 Unit 1 King Studios    1      1   56 October
## 2 Unit 2 King Studios    1      0   56 October
## 3 Unit 3 King Studios    1      0   56 October
## 4 Unit 4 King Studios    1      0   56 October
## 5 Unit 5 King Studios    1      1   56 October
## 6 Unit 6 King Studios    1      0   56 October
# November

d1<-read.csv("https://raw.githubusercontent.com/tuyenhavan/Statistics/KiwiMotel/November.csv")

# 

d2<-d1[-26,]


d2<-d2 %>% tidyr::gather(Days,Status,X1:X30)

head(d2)
##    Rooms        Types Days Status
## 1 Unit 1 King Studios   X1      1
## 2 Unit 2 King Studios   X1      1
## 3 Unit 3 King Studios   X1      1
## 4 Unit 4 King Studios   X1      0
## 5 Unit 5 King Studios   X1      0
## 6 Unit 6 King Studios   X1      1
#
d_rate<-d1[26,]

d_rate<-as.vector(d_rate)

d_rate<-data.frame(d_rate)

d_rate<-d_rate %>% tidyr::gather(Days,Rate,X1:X30)

d_rate<-d_rate[,c(3,4)]

head(d_rate)
##   Days Rate
## 1   X1   36
## 2   X2   36
## 3   X3   32
## 4   X4   44
## 5   X5   40
## 6   X6    8
d_Nov<-full_join(d2,d_rate,by="Days")


d_Nov$Days<-gsub("X","",d_Nov$Days)


d_Nov$Month<-c("November")


November<-d_Nov

head(November)
##    Rooms        Types Days Status Rate    Month
## 1 Unit 1 King Studios    1      1   36 November
## 2 Unit 2 King Studios    1      1   36 November
## 3 Unit 3 King Studios    1      1   36 November
## 4 Unit 4 King Studios    1      0   36 November
## 5 Unit 5 King Studios    1      0   36 November
## 6 Unit 6 King Studios    1      1   36 November
# December
d1<-read.csv("https://raw.githubusercontent.com/tuyenhavan/Statistics/KiwiMotel/December.csv")

# 

d2<-d1[-26,]


d2<-d2 %>% tidyr::gather(Days,Status,X1:X30)

head(d2)
##    Rooms        Types Days Status
## 1 Unit 1 King Studios   X1      1
## 2 Unit 2 King Studios   X1      0
## 3 Unit 3 King Studios   X1      1
## 4 Unit 4 King Studios   X1      0
## 5 Unit 5 King Studios   X1      1
## 6 Unit 6 King Studios   X1      0
#
d_rate<-d1[26,]

d_rate<-as.vector(d_rate)

d_rate<-data.frame(d_rate)

d_rate<-d_rate %>% tidyr::gather(Days,Rate,X1:X30)

d_rate<-d_rate[,c(3,4)]

head(d_rate)
##   Days Rate
## 1   X1   32
## 2   X2   28
## 3   X3   24
## 4   X4    8
## 5   X5   32
## 6   X6   40
d_Dec<-full_join(d2,d_rate,by="Days")


d_Dec$Days<-gsub("X","",d_Dec$Days)


d_Dec$Month<-c("December")


head(d_Dec)
##    Rooms        Types Days Status Rate    Month
## 1 Unit 1 King Studios    1      1   32 December
## 2 Unit 2 King Studios    1      0   32 December
## 3 Unit 3 King Studios    1      1   32 December
## 4 Unit 4 King Studios    1      0   32 December
## 5 Unit 5 King Studios    1      1   32 December
## 6 Unit 6 King Studios    1      0   32 December
December<-d_Dec

head(December)
##    Rooms        Types Days Status Rate    Month
## 1 Unit 1 King Studios    1      1   32 December
## 2 Unit 2 King Studios    1      0   32 December
## 3 Unit 3 King Studios    1      1   32 December
## 4 Unit 4 King Studios    1      0   32 December
## 5 Unit 5 King Studios    1      1   32 December
## 6 Unit 6 King Studios    1      0   32 December
df<-rbind(August,September,October,November,December)

head(df)
##    Rooms        Types Days Status Rate  Month
## 1 Unit 1 King Studios    1      0   32 August
## 2 Unit 2 King Studios    1      0   32 August
## 3 Unit 3 King Studios    1      0   32 August
## 4 Unit 4 King Studios    1      0   32 August
## 5 Unit 5 King Studios    1      1   32 August
## 6 Unit 6 King Studios    1      1   32 August
df$Days<-as.numeric(df$Days)

df$Status<-as.factor(df$Status)

glimpse(df)
## Observations: 3,800
## Variables: 6
## $ Rooms  <fctr> Unit 1, Unit 2, Unit 3, Unit 4, Unit 5, Unit 6, Unit 7...
## $ Types  <fctr> King Studios, King Studios, King Studios, King Studios...
## $ Days   <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...
## $ Status <fctr> 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, ...
## $ Rate   <int> 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,...
## $ Month  <chr> "August", "August", "August", "August", "August", "Augu...
# Boxplot

library(ggplot2)

ggplot(data=df,aes(x=Month,y=Rate)) + geom_boxplot(color=4,fill=3)

August

# Boxplot 

# August

df1<-df[df$Month=="August",]


ggplot(data=df1,aes(x=Days,y=Rate)) + geom_line(color=4)

ggplot(data=df1,aes(x=Status,y=Rate)) + geom_boxplot(aes(fill=Status))

ggplot(data=df,aes(x=Days,y=Rate)) + geom_line(color=4)