1. Libraries

library(readxl)
library(dplyr)
library(seasonal)
library(ggplot2)
library(forecast)
library(tidyverse)
library(tsibble)
library(feasts)
library(lubridate)
library(tsibbledata)
library(fable)
library(zoo)
library(fst)

 
 

2. Data Prep

#
d <- "data"
rData <- file.path(d)

#data.raw <- read.csv(file.path(rData, "bumble_google_play_reviews.csv"), stringsAsFactors = TRUE)
#write_fst(data.raw, file.path(rData, "bumble_google_play_reviews.fst"))


data.raw <- read_fst(file.path(rData, "bumble_google_play_reviews.fst"))
data.filter <- data.raw %>% arrange(ymd(data.raw$at))

data.filter <- data.filter[,c(1,2,5,8)]
data.filter$reviewId <- 1:nrow(data.raw)
data.filter$day <- mday(data.filter$at)
data.filter$month <- month(data.filter$at)
data.filter$year <- year(data.filter$at)

data <- data.filter[,c(1,2,5,6,7,3,4)]

#
### current data provides no useful metric for time-series analysis
#
hist(data$score)

#
## need to create bins that count number of reviews for each day. 
#

##
# == FUNCTION 1 == #
#
nmo <- function(mo, n.mo){ # compares current month with next month
  if(n.mo != mo){
    return(mo)
  }else{
    return(n.mo)
  }
}

##
# == FUNCTION 2 == #
#
nyr <- function(yr, n.yr){ # same as above but for year
  if(n.yr != yr){
    return(yr)
  }else{
    return(n.yr)
  }
}

##
# == FUNCTION 3 == #
#
bumbles.binner <- function(data){ # complicated
  data <- data[order(data$at),]
  
  d.d <- data.frame(matrix(ncol=5)) # d.d - "data duplicate"
  names(d.d)[1:5] <- c("ID", "NumReviews", "Day", "Month", "Year")
  dates <- as.Date(with(data, paste(data$year, data$month, data$day, sep="-")), 
                   "%Y-%m-%d")                        #
  min <- min(dates)                                   #
  max <- max(dates)                                   #
  total.time <- seq(from = min, to = max, by = 'day') # used to verify number of days
  t <- length(total.time)                             #
  
  id = 1
  r.c = 1 # r.c - "review count"
  yr = data[1,5]
  mo = data[1,4]
  day = data[1,3]
  for(i in 1:nrow(data)){
    
    if((data[i+1,3] == day) || (id >= t)){ # tallies review amt for individual days
      r.c = r.c + 1
    }else if((data[i+1,3] - data[i,3]) > 1){ # had to create and fill missing rows for decomposition
      diff = as.integer(data[i+1,3] - day)
      
      for(i in 1:diff){
        d.d[id,1:5] = c(id, r.c, day, mo, yr)
        day = day + 1
        id = id + 1
        r.c = 0
      }
      r.c = 1
    }else{ # recorded change between cells and populated bin data
      d.d[id,1:5] = c(id, r.c, day, mo, yr)
      
      r.c = 1
      id = id + 1
      day = data[i+1,3]
      mo = nmo(data[i+1,4], data[i,4])
      yr = nyr(data[i+1,5], data[i,5])
    }
  }
  
  d.d$Date <- as.Date(with(d.d, paste(d.d$Year, d.d$Month, # just adding date for later
                                           d.d$Day, sep="-")), "%Y-%m-%d") 
  return(d.d)
}
#
#
##


bins.all <- bumbles.binner(data)

 
 

3. Decomposition Prep

myts <- ts(bins.all$NumReviews, frequency=365, start=2015)
tsdisplay(myts)

autoplot(myts) + 
  labs(y="Number of Reviews",
       x = "Date",
       title = "Bumble Ratings")

data.ts <- bins.all %>%
  mutate(Month = bins.all$Date) %>%
  as_tsibble(index = Month)

 
 

4. Additive Model

data.ts %>%
  model(
    classical_decomposition(NumReviews, type="additive")
  ) %>%
  components() %>%
  autoplot()

dec1 <- decompose(myts, type="additive")
plot(as.ts(dec1$trend))

plot(as.ts(dec1$seasonal))

plot(as.ts(dec1$random))

### ========================

 
 

5. Multiplicative Model

data.ts %>%
  model(
    classical_decomposition(NumReviews, type="multiplicative")
  ) %>%
  components() %>%
  autoplot()

dec2 <- decompose(myts, type="multiplicative")
plot(as.ts(dec2$trend))

plot(as.ts(dec2$seasonal))

plot(as.ts(dec2$random))

#