Visualizations

dates <- as.Date(Google_Trends_Combined_Feb_Mar_2018$Day, "%m/%d/%Y")
Google_Trends_Combined_Feb_Mar_2018["Day"] = dates
ggplot(data = Google_Trends_Combined_Feb_Mar_2018, aes(x = Day,y = PCOS_WW, group = 1)) +
  geom_path() +
  xlab("") 

ggplot(data = Google_Trends_Combined_Feb_Mar_2018,aes(y = PCOS_WW))+
  geom_boxplot(fill = "#f0421e") +
  theme_minimal()

test <- grubbs.test(Google_Trends_Combined_Feb_Mar_2018$PCOS_WW)
test
## 
##  Grubbs test for one outlier
## 
## data:  Google_Trends_Combined_Feb_Mar_2018$PCOS_WW
## G = 4.1706, U = 0.3566, p-value = 2.432e-06
## alternative hypothesis: highest value 100 is an outlier
test <- grubbs.test(Google_Trends_Combined_Feb_Mar_2018$PCOS_WW, opposite = TRUE)
test
## 
##  Grubbs test for one outlier
## 
## data:  Google_Trends_Combined_Feb_Mar_2018$PCOS_WW
## G = 1.56297, U = 0.90964, p-value = 1
## alternative hypothesis: lowest value 26 is an outlier

So both are outliers…

acf(Google_Trends_Combined_Feb_Mar_2018$PCOS_WW)

Box.test(Google_Trends_Combined_Feb_Mar_2018$PCOS_WW, type="Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  Google_Trends_Combined_Feb_Mar_2018$PCOS_WW
## X-squared = 4.4511, df = 1, p-value = 0.03488

Box test shows us that there is a non-stationary trend in out time series. (Low \(p\) value)

adf.test(Google_Trends_Combined_Feb_Mar_2018$PCOS_WW)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  Google_Trends_Combined_Feb_Mar_2018$PCOS_WW
## Dickey-Fuller = -2.2813, Lag order = 3, p-value = 0.4648
## alternative hypothesis: stationary
cptfn <- function(data, pen) {
  ans <- cpt.mean(data, test.stat="Normal", method = "PELT", penalty = "Manual", pen.value = pen) 
  length(cpts(ans)) +1
}
 

# run cptfn for the signal with a known change point
pen.vals <- seq(0, 500,1)
elbowplotData <- unlist(lapply(pen.vals, function(p) 
                  cptfn(data = Google_Trends_Combined_Feb_Mar_2018$PCOS_WW, pen = p)))

plot(pen.vals,elbowplotData,
     xlab = "PELT penalty parameter",
     ylab = " ",
     main = " ")

cptm_CP         <- cpt.mean(Google_Trends_Combined_Feb_Mar_2018$PCOS_WW, penalty='Manual',pen.value=200,method='PELT') 
cpts_CP         <- cpts(cptm_CP) # change point time points
cpts_CP
## [1] 15 16 18
Google_Trends_Combined_Feb_Mar_2018[16,]
## # A tibble: 1 x 5
##   Day        PCOS_WW Polycystic_ovary_syndrom… PCOS_US Polycystic_ovary_syndrom…
##   <date>       <dbl>                     <dbl>   <dbl>                     <dbl>
## 1 2018-02-27     100                       100     100                       100