Â
library(readxl)
library(dplyr)
library(seasonal)
library(ggplot2)
library(forecast)
library(tidyverse)
library(tsibble)
library(feasts)
library(lubridate)
library(tsibbledata)
library(fable)
library(zoo)
library(fst)
Â
Â
#
d <- "data"
rData <- file.path(d)
#data.raw <- read.csv(file.path(rData, "bumble_google_play_reviews.csv"), stringsAsFactors = TRUE)
#write_fst(data.raw, file.path(rData, "bumble_google_play_reviews.fst"))
data.raw <- read_fst(file.path(rData, "bumble_google_play_reviews.fst"))
data.filter <- data.raw %>% arrange(ymd(data.raw$at))
data.filter <- data.filter[,c(1,2,5,8)]
data.filter$reviewId <- 1:nrow(data.raw)
data.filter$day <- mday(data.filter$at)
data.filter$month <- month(data.filter$at)
data.filter$year <- year(data.filter$at)
data <- data.filter[,c(1,2,5,6,7,3,4)]
#
### current data provides no useful metric for time-series analysis
#
hist(data$score)
#
## need to create bins that count number of reviews for each day.
#
##
# == FUNCTION 1 == #
#
nmo <- function(mo, n.mo){ # compares current month with next month
if(n.mo != mo){
return(mo)
}else{
return(n.mo)
}
}
##
# == FUNCTION 2 == #
#
nyr <- function(yr, n.yr){ # same as above but for year
if(n.yr != yr){
return(yr)
}else{
return(n.yr)
}
}
##
# == FUNCTION 3 == #
#
bumbles.binner <- function(data){ # complicated
data <- data[order(data$at),]
d.d <- data.frame(matrix(ncol=5)) # d.d - "data duplicate"
names(d.d)[1:5] <- c("ID", "NumReviews", "Day", "Month", "Year")
dates <- as.Date(with(data, paste(data$year, data$month, data$day, sep="-")),
"%Y-%m-%d") #
min <- min(dates) #
max <- max(dates) #
total.time <- seq(from = min, to = max, by = 'day') # used to verify number of days
t <- length(total.time) #
id = 1
r.c = 1 # r.c - "review count"
yr = data[1,5]
mo = data[1,4]
day = data[1,3]
for(i in 1:nrow(data)){
if((data[i+1,3] == day) || (id >= t)){ # tallies review amt for individual days
r.c = r.c + 1
}else if((data[i+1,3] - data[i,3]) > 1){ # had to create and fill missing rows for decomposition
diff = as.integer(data[i+1,3] - day)
for(i in 1:diff){
d.d[id,1:5] = c(id, r.c, day, mo, yr)
day = day + 1
id = id + 1
r.c = 0
}
r.c = 1
}else{ # recorded change between cells and populated bin data
d.d[id,1:5] = c(id, r.c, day, mo, yr)
r.c = 1
id = id + 1
day = data[i+1,3]
mo = nmo(data[i+1,4], data[i,4])
yr = nyr(data[i+1,5], data[i,5])
}
}
d.d$Date <- as.Date(with(d.d, paste(d.d$Year, d.d$Month, # just adding date for later
d.d$Day, sep="-")), "%Y-%m-%d")
return(d.d)
}
#
#
##
bins.all <- bumbles.binner(data)
Â
Â
myts <- ts(bins.all$NumReviews, frequency=365, start=2015)
tsdisplay(myts)
autoplot(myts) +
labs(y="Number of Reviews",
x = "Date",
title = "Bumble Ratings")
data.ts <- bins.all %>%
mutate(Month = bins.all$Date) %>%
as_tsibble(index = Month)
Â
Â
data.ts %>%
model(
classical_decomposition(NumReviews, type="additive")
) %>%
components() %>%
autoplot()
dec1 <- decompose(myts, type="additive")
plot(as.ts(dec1$trend))
plot(as.ts(dec1$seasonal))
plot(as.ts(dec1$random))
### ========================
Â
Â
data.ts %>%
model(
classical_decomposition(NumReviews, type="multiplicative")
) %>%
components() %>%
autoplot()
dec2 <- decompose(myts, type="multiplicative")
plot(as.ts(dec2$trend))
plot(as.ts(dec2$seasonal))
plot(as.ts(dec2$random))
#