dates <- as.Date(Google_Trends_Combined_Feb_Mar_2018$Day, "%m/%d/%Y")
Google_Trends_Combined_Feb_Mar_2018["Day"] = dates
ggplot(data = Google_Trends_Combined_Feb_Mar_2018, aes(x = Day,y = PCOS_WW, group = 1)) +
geom_path() +
xlab("")
ggplot(data = Google_Trends_Combined_Feb_Mar_2018,aes(y = PCOS_WW))+
geom_boxplot(fill = "#f0421e") +
theme_minimal()
test <- grubbs.test(Google_Trends_Combined_Feb_Mar_2018$PCOS_WW)
test
##
## Grubbs test for one outlier
##
## data: Google_Trends_Combined_Feb_Mar_2018$PCOS_WW
## G = 4.1706, U = 0.3566, p-value = 2.432e-06
## alternative hypothesis: highest value 100 is an outlier
test <- grubbs.test(Google_Trends_Combined_Feb_Mar_2018$PCOS_WW, opposite = TRUE)
test
##
## Grubbs test for one outlier
##
## data: Google_Trends_Combined_Feb_Mar_2018$PCOS_WW
## G = 1.56297, U = 0.90964, p-value = 1
## alternative hypothesis: lowest value 26 is an outlier
So both are outliers…
acf(Google_Trends_Combined_Feb_Mar_2018$PCOS_WW)
Box.test(Google_Trends_Combined_Feb_Mar_2018$PCOS_WW, type="Ljung-Box")
##
## Box-Ljung test
##
## data: Google_Trends_Combined_Feb_Mar_2018$PCOS_WW
## X-squared = 4.4511, df = 1, p-value = 0.03488
Box test shows us that there is a non-stationary trend in out time series. (Low \(p\) value)
adf.test(Google_Trends_Combined_Feb_Mar_2018$PCOS_WW)
##
## Augmented Dickey-Fuller Test
##
## data: Google_Trends_Combined_Feb_Mar_2018$PCOS_WW
## Dickey-Fuller = -2.2813, Lag order = 3, p-value = 0.4648
## alternative hypothesis: stationary
cptfn <- function(data, pen) {
ans <- cpt.mean(data, test.stat="Normal", method = "PELT", penalty = "Manual", pen.value = pen)
length(cpts(ans)) +1
}
# run cptfn for the signal with a known change point
pen.vals <- seq(0, 500,1)
elbowplotData <- unlist(lapply(pen.vals, function(p)
cptfn(data = Google_Trends_Combined_Feb_Mar_2018$PCOS_WW, pen = p)))
plot(pen.vals,elbowplotData,
xlab = "PELT penalty parameter",
ylab = " ",
main = " ")
cptm_CP <- cpt.mean(Google_Trends_Combined_Feb_Mar_2018$PCOS_WW, penalty='Manual',pen.value=200,method='PELT')
cpts_CP <- cpts(cptm_CP) # change point time points
cpts_CP
## [1] 15 16 18
Google_Trends_Combined_Feb_Mar_2018[16,]
## # A tibble: 1 x 5
## Day PCOS_WW Polycystic_ovary_syndrom… PCOS_US Polycystic_ovary_syndrom…
## <date> <dbl> <dbl> <dbl> <dbl>
## 1 2018-02-27 100 100 100 100