Code examples are from https://www.r-bloggers.com/anomaly-detection-in-r-2/ written by the team at Perceptive Analytics: Madhur Modi, Prudhvi Potuganti, Saneesh Veetil and Chaitanya Sagar.

Check if the packages needed are already installed and if not install them. First install the devtools package. Then install the github packages.

The wikipediatrend package contains the API to access wikipedia trends data on any page in Wikipedia. The AnomalyDetection package detects anomalies in seasonal univariate time series.

neededPackages = c("devtools", "Rcpp", "ggplot2")
for (i in 1:length(neededPackages)) {
    ifelse(!neededPackages[i] %in% installed.packages(), install.packages(neededPackages[i]), 
    print(paste(neededPackages[i], "is already installed.")))
}
[1] "devtools is already installed."
[1] "Rcpp is already installed."
[1] "ggplot2 is already installed."
library(devtools)
gitHubPackages = c("wikipediatrend", "AnomalyDetection")
gitHubLoction = c("petermeissner/wikipediatrend", "twitter/AnomalyDetection")
for (i in 1:length(gitHubPackages)) {
    ifelse(!gitHubPackages[i] %in% installed.packages(), install_github(gitHubLoction[i]), 
    print(paste(gitHubPackages[i], "is already installed.")))
}
[1] "wikipediatrend is already installed."
[1] "AnomalyDetection is already installed."
library(Rcpp)
library(wikipediatrend)
library(AnomalyDetection)

Download the trends data from the English Wikipedia webpage for President Michael D. Higgins “Michael_D._Higgins" and save it as the variable higginsWikipedia. View the first 6 rows.

higginsWikipedia = wp_trend("Michael_D._Higgins", from="2018-01-01", lang = "en")
head(higginsWikipedia)
  project   language article            access     agent      granularity date       views
1 wikipedia en       Michael_D._Higgins all-access all-agents daily       2018-01-01 785  
2 wikipedia en       Michael_D._Higgins all-access all-agents daily       2018-01-02 773  
3 wikipedia en       Michael_D._Higgins all-access all-agents daily       2018-01-03 657  
4 wikipedia en       Michael_D._Higgins all-access all-agents daily       2018-01-04 668  
5 wikipedia en       Michael_D._Higgins all-access all-agents daily       2018-01-05 622  
6 wikipedia en       Michael_D._Higgins all-access all-agents daily       2018-01-06 689  

Plot the data using ggplot2. Set the x-axis to the Date and the y-axis to the number of views.

library(ggplot2)
ggplot(higginsWikipedia, aes(x=date, y=views, color=views)) + 
  geom_line()

Prepare the data for the AnomalyDetection package by keeping only the date and page views and discard all other variables. The input must be a series of pairs. Check the structure of the higginsWikipedia dataframe. The date column must be in POSIXct format.

columns_to_keep=c("date","views")
higginsWikipedia=higginsWikipedia[,columns_to_keep]
str(higginsWikipedia)
Classes ‘wp_df’ and 'data.frame':   350 obs. of  2 variables:
 $ date : POSIXct, format: "2018-01-01" "2018-01-02" "2018-01-03" ...
 $ views: num  785 773 657 668 622 ...

AnomalyDetection Method

Apply anomaly detection and plot the results.

AnomalyDetectionHiggins = AnomalyDetectionTs(higginsWikipedia, direction="pos", plot=TRUE, title = "Anomaly Detection")
AnomalyDetectionHiggins$plot

Look at the dates the anomalies occured.

anomaliesHiggins$anoms$timestamp

Anomalize Method

Install anomalize package.

#install.packages('anomalize')
#Update from github
#library(devtools)
#install_github("business-science/anomalize")
#Load the package
library(anomalize)
# We will also use tidyverse package for processing and coindeskr to get bitcoin data
library(tidyverse)

Decompose data using time_decompose() function in anomalize package. We will use stl method which extracts seasonality.

higginsWikipedia_ts = higginsWikipedia %>%
                          as.tibble()
higginsWikipedia_ts %>%
  time_decompose(views, method = "stl",  frequency = "auto", trend = "auto") %>%  
  anomalize(remainder, method = "gesd", alpha = 0.05, max_anoms = 0.1) %>% 
  plot_anomaly_decomposition()
Converting from tbl_df to tbl_time.
Auto-index message: index = date
frequency = 7 days
trend = 90.5 days

Decompose data using time_decompose() function in anomalize package. We will use stl method which extracts seasonality.

higginsWikipedia_ts %>% 
  time_decompose(views, method = "stl", frequency = "auto", trend = "auto") %>%  
  anomalize(remainder, method = "gesd", alpha = 0.05, max_anoms = 0.1) %>% 
  plot_anomaly_decomposition()
Converting from tbl_df to tbl_time.
Auto-index message: index = date
frequency = 7 days
trend = 90.5 days

Plot the data again by recomposing data

higginsWikipedia_ts %>% 
  time_decompose(views) %>% 

Extract the anomalies

anomalizeHiggins = higginsWikipedia_ts %>% 
  time_decompose(views) %>%  
  anomalize(remainder) %>%  
  time_recompose() %>%  
  filter(anomaly == 'Yes')
Converting from tbl_df to tbl_time.
Auto-index message: index = date
frequency = 7 days
trend = 90.5 days
anomalizeHiggins
# A time tibble: 36 x 10
# Index: date
   date                observed season trend remainder remainder_l1 remainder_l2 anomaly recomposed_l1
   <dttm>                 <dbl>  <dbl> <dbl>     <dbl>        <dbl>        <dbl> <chr>           <dbl>
 1 2018-01-15 00:00:00   10627    59.7  851.     9716.       -1288.        1459. Yes             -377.
 2 2018-01-16 00:00:00   12142    39.8  850.    11252.       -1288.        1459. Yes             -398.
 3 2018-01-17 00:00:00    4009   -15.1  850.     3174.       -1288.        1459. Yes             -453.
 4 2018-01-29 00:00:00    3260.   59.7  845.     2356.       -1288.        1459. Yes             -384.
 5 2018-02-10 00:00:00    2773   -46.1  842.     1977.       -1288.        1459. Yes             -492.
 6 2018-02-21 00:00:00    2364   -15.1  830.     1549.       -1288.        1459. Yes             -473.
 7 2018-03-10 00:00:00    3682   -46.1  845.     2883.       -1288.        1459. Yes             -489.
 8 2018-03-17 00:00:00    2307   -46.1  855.     1498.       -1288.        1459. Yes             -479.
 9 2018-05-26 00:00:00    2972   -46.1  793.     2225.       -1288.        1459. Yes             -541.
10 2018-07-10 00:00:00    5597    39.8  831.     4726.       -1288.        1459. Yes             -417.
# ... with 26 more rows, and 1 more variable: recomposed_l2 <dbl>
LS0tDQp0aXRsZTogIkFub21hbHkgRGV0ZWN0aW9uIE5vdGVib29rIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCkNvZGUgZXhhbXBsZXMgYXJlIGZyb20gaHR0cHM6Ly93d3cuci1ibG9nZ2Vycy5jb20vYW5vbWFseS1kZXRlY3Rpb24taW4tci0yLyB3cml0dGVuIGJ5IHRoZSB0ZWFtIGF0IFBlcmNlcHRpdmUgQW5hbHl0aWNzOiBNYWRodXIgTW9kaSwgUHJ1ZGh2aSBQb3R1Z2FudGksIFNhbmVlc2ggVmVldGlsIGFuZCBDaGFpdGFueWEgU2FnYXIuDQoNCg0KQ2hlY2sgaWYgdGhlIHBhY2thZ2VzIG5lZWRlZCBhcmUgYWxyZWFkeSBpbnN0YWxsZWQgYW5kIGlmIG5vdCBpbnN0YWxsIHRoZW0uDQpGaXJzdCBpbnN0YWxsIHRoZSBkZXZ0b29scyBwYWNrYWdlLiBUaGVuIGluc3RhbGwgdGhlIGdpdGh1YiBwYWNrYWdlcy4NCg0KVGhlICoqd2lraXBlZGlhdHJlbmQqKiBwYWNrYWdlIGNvbnRhaW5zIHRoZSBBUEkgdG8gYWNjZXNzIHdpa2lwZWRpYSB0cmVuZHMgZGF0YSBvbiBhbnkgcGFnZSBpbiBXaWtpcGVkaWEuDQpUaGUgKipBbm9tYWx5RGV0ZWN0aW9uKiogcGFja2FnZSBkZXRlY3RzIGFub21hbGllcyBpbiBzZWFzb25hbCB1bml2YXJpYXRlIHRpbWUgc2VyaWVzLiANCg0KYGBge3Igd2FybmluZyA9IEZBTFNFfQ0KbmVlZGVkUGFja2FnZXMgPSBjKCJkZXZ0b29scyIsICJSY3BwIiwgImdncGxvdDIiKQ0KDQpmb3IgKGkgaW4gMTpsZW5ndGgobmVlZGVkUGFja2FnZXMpKSB7DQogICAgaWZlbHNlKCFuZWVkZWRQYWNrYWdlc1tpXSAlaW4lIGluc3RhbGxlZC5wYWNrYWdlcygpLCBpbnN0YWxsLnBhY2thZ2VzKG5lZWRlZFBhY2thZ2VzW2ldKSwgDQogICAgcHJpbnQocGFzdGUobmVlZGVkUGFja2FnZXNbaV0sICJpcyBhbHJlYWR5IGluc3RhbGxlZC4iKSkpDQp9DQoNCmBgYA0KDQoNCmBgYHtyIHdhcm5pbmc9RkFMU0V9DQpsaWJyYXJ5KGRldnRvb2xzKQ0KZ2l0SHViUGFja2FnZXMgPSBjKCJ3aWtpcGVkaWF0cmVuZCIsICJBbm9tYWx5RGV0ZWN0aW9uIikNCmdpdEh1YkxvY3Rpb24gPSBjKCJwZXRlcm1laXNzbmVyL3dpa2lwZWRpYXRyZW5kIiwgInR3aXR0ZXIvQW5vbWFseURldGVjdGlvbiIpDQoNCmZvciAoaSBpbiAxOmxlbmd0aChnaXRIdWJQYWNrYWdlcykpIHsNCiAgICBpZmVsc2UoIWdpdEh1YlBhY2thZ2VzW2ldICVpbiUgaW5zdGFsbGVkLnBhY2thZ2VzKCksIGluc3RhbGxfZ2l0aHViKGdpdEh1YkxvY3Rpb25baV0pLCANCiAgICBwcmludChwYXN0ZShnaXRIdWJQYWNrYWdlc1tpXSwgImlzIGFscmVhZHkgaW5zdGFsbGVkLiIpKSkNCn0NCg0KbGlicmFyeShSY3BwKQ0KbGlicmFyeSh3aWtpcGVkaWF0cmVuZCkNCmxpYnJhcnkoQW5vbWFseURldGVjdGlvbikNCg0KYGBgDQoNCg0KRG93bmxvYWQgdGhlIHRyZW5kcyBkYXRhIGZyb20gdGhlIEVuZ2xpc2ggV2lraXBlZGlhIHdlYnBhZ2UgZm9yIFByZXNpZGVudCBNaWNoYWVsIEQuIEhpZ2dpbnMgIk1pY2hhZWxfRC5fSGlnZ2lucyIgYW5kIHNhdmUgaXQgYXMgdGhlIHZhcmlhYmxlIGhpZ2dpbnNXaWtpcGVkaWEuDQpWaWV3IHRoZSBmaXJzdCA2IHJvd3MuDQoNCmBgYHtyfQ0KaGlnZ2luc1dpa2lwZWRpYSA9IHdwX3RyZW5kKCJNaWNoYWVsX0QuX0hpZ2dpbnMiLCBmcm9tPSIyMDE4LTAxLTAxIiwgbGFuZyA9ICJlbiIpDQpoZWFkKGhpZ2dpbnNXaWtpcGVkaWEpDQoNCmBgYA0KDQpQbG90IHRoZSBkYXRhIHVzaW5nIGdncGxvdDIuIFNldCB0aGUgeC1heGlzIHRvIHRoZSBEYXRlIGFuZCB0aGUgeS1heGlzIHRvIHRoZSBudW1iZXIgb2Ygdmlld3MuDQpgYGB7cn0NCmxpYnJhcnkoZ2dwbG90MikNCg0KZ2dwbG90KGhpZ2dpbnNXaWtpcGVkaWEsIGFlcyh4PWRhdGUsIHk9dmlld3MsIGNvbG9yPXZpZXdzKSkgKyANCiAgZ2VvbV9saW5lKCkNCg0KYGBgDQpQcmVwYXJlIHRoZSBkYXRhIGZvciB0aGUgQW5vbWFseURldGVjdGlvbiBwYWNrYWdlIGJ5IGtlZXBpbmcgb25seSB0aGUgZGF0ZSBhbmQgcGFnZSB2aWV3cyBhbmQgZGlzY2FyZCBhbGwgb3RoZXIgdmFyaWFibGVzLiAgVGhlIGlucHV0IG11c3QgYmUgYSBzZXJpZXMgb2YgPHRpbWVzdGFtcCwgY291bnQ+IHBhaXJzLiBDaGVjayB0aGUgc3RydWN0dXJlIG9mIHRoZSBoaWdnaW5zV2lraXBlZGlhIGRhdGFmcmFtZS4gVGhlIGRhdGUgY29sdW1uIG11c3QgYmUgaW4gUE9TSVhjdCBmb3JtYXQuDQoNCmBgYHtyfQ0KY29sdW1uc190b19rZWVwPWMoImRhdGUiLCJ2aWV3cyIpDQpoaWdnaW5zV2lraXBlZGlhPWhpZ2dpbnNXaWtpcGVkaWFbLGNvbHVtbnNfdG9fa2VlcF0NCnN0cihoaWdnaW5zV2lraXBlZGlhKQ0KYGBgDQoNCiMjQW5vbWFseURldGVjdGlvbiBNZXRob2QNCkFwcGx5IGFub21hbHkgZGV0ZWN0aW9uIGFuZCBwbG90IHRoZSByZXN1bHRzLg0KDQpgYGB7cn0NCkFub21hbHlEZXRlY3Rpb25IaWdnaW5zID0gQW5vbWFseURldGVjdGlvblRzKGhpZ2dpbnNXaWtpcGVkaWEsIGRpcmVjdGlvbj0icG9zIiwgcGxvdD1UUlVFLCB0aXRsZSA9ICJBbm9tYWx5IERldGVjdGlvbiIpDQpBbm9tYWx5RGV0ZWN0aW9uSGlnZ2lucyRwbG90DQoNCmBgYA0KTG9vayBhdCB0aGUgZGF0ZXMgdGhlIGFub21hbGllcyBvY2N1cmVkLg0KYGBge3J9DQphbm9tYWxpZXNIaWdnaW5zJGFub21zJHRpbWVzdGFtcA0KYGBgDQoNCiMjQW5vbWFsaXplIE1ldGhvZA0KDQpJbnN0YWxsIGFub21hbGl6ZSBwYWNrYWdlLg0KYGBge3J9DQojaW5zdGFsbC5wYWNrYWdlcygnYW5vbWFsaXplJykNCiNVcGRhdGUgZnJvbSBnaXRodWINCiNsaWJyYXJ5KGRldnRvb2xzKQ0KI2luc3RhbGxfZ2l0aHViKCJidXNpbmVzcy1zY2llbmNlL2Fub21hbGl6ZSIpDQojTG9hZCB0aGUgcGFja2FnZQ0KbGlicmFyeShhbm9tYWxpemUpDQpsaWJyYXJ5KHRpZHl2ZXJzZSkNCmBgYA0KDQoNCkRlY29tcG9zZSBkYXRhIHVzaW5nIHRpbWVfZGVjb21wb3NlKCkgZnVuY3Rpb24gaW4gYW5vbWFsaXplIHBhY2thZ2UuIFdlIHdpbGwgdXNlIHN0bCBtZXRob2Qgd2hpY2ggZXh0cmFjdHMgc2Vhc29uYWxpdHkuDQpgYGB7cn0NCmhpZ2dpbnNXaWtpcGVkaWFfdHMgPSBoaWdnaW5zV2lraXBlZGlhICU+JQ0KICAgICAgICAgICAgICAgICAgICAgICAgICBhcy50aWJibGUoKQ0KaGlnZ2luc1dpa2lwZWRpYV90cyAlPiUNCiAgdGltZV9kZWNvbXBvc2Uodmlld3MsIG1ldGhvZCA9ICJzdGwiLCAgZnJlcXVlbmN5ID0gImF1dG8iLCB0cmVuZCA9ICJhdXRvIikgJT4lICANCiAgYW5vbWFsaXplKHJlbWFpbmRlciwgbWV0aG9kID0gImdlc2QiLCBhbHBoYSA9IDAuMDUsIG1heF9hbm9tcyA9IDAuMSkgJT4lIA0KICBwbG90X2Fub21hbHlfZGVjb21wb3NpdGlvbigpDQpgYGANCg0KDQoNCkRlY29tcG9zZSBkYXRhIHVzaW5nIHRpbWVfZGVjb21wb3NlKCkgZnVuY3Rpb24gaW4gYW5vbWFsaXplIHBhY2thZ2UuIFdlIHdpbGwgdXNlIHN0bCBtZXRob2Qgd2hpY2ggZXh0cmFjdHMgc2Vhc29uYWxpdHkuDQpgYGB7cn0NCmhpZ2dpbnNXaWtpcGVkaWFfdHMgJT4lIA0KICB0aW1lX2RlY29tcG9zZSh2aWV3cywgbWV0aG9kID0gInN0bCIsIGZyZXF1ZW5jeSA9ICJhdXRvIiwgdHJlbmQgPSAiYXV0byIpICU+JSAgDQogIGFub21hbGl6ZShyZW1haW5kZXIsIG1ldGhvZCA9ICJnZXNkIiwgYWxwaGEgPSAwLjA1LCBtYXhfYW5vbXMgPSAwLjEpICU+JSANCiAgcGxvdF9hbm9tYWx5X2RlY29tcG9zaXRpb24oKQ0KYGBgDQoNClBsb3QgdGhlIGRhdGEgYWdhaW4gYnkgcmVjb21wb3NpbmcgZGF0YQ0KYGBge3J9DQpoaWdnaW5zV2lraXBlZGlhX3RzICU+JSANCiAgdGltZV9kZWNvbXBvc2Uodmlld3MpICU+JSANCiAgYW5vbWFsaXplKHJlbWFpbmRlcikgJT4lIA0KICB0aW1lX3JlY29tcG9zZSgpICU+JSAgDQogIHBsb3RfYW5vbWFsaWVzKHRpbWVfcmVjb21wb3NlZCA9IFRSVUUsIG5jb2wgPSAzLCBhbHBoYV9kb3RzID0gMC41KQ0KYGBgDQoNCkV4dHJhY3QgdGhlIGFub21hbGllcw0KYGBge3J9DQphbm9tYWxpemVIaWdnaW5zID0gaGlnZ2luc1dpa2lwZWRpYV90cyAlPiUgDQogIHRpbWVfZGVjb21wb3NlKHZpZXdzKSAlPiUgIA0KICBhbm9tYWxpemUocmVtYWluZGVyKSAlPiUgIA0KICB0aW1lX3JlY29tcG9zZSgpICU+JSAgDQogIGZpbHRlcihhbm9tYWx5ID09ICdZZXMnKQ0KYW5vbWFsaXplSGlnZ2lucw0KYGBgDQoNCg0KDQo=