library(ggplot2)
library(lubridate)
library(forecast)
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Loading required package: timeDate
## This is forecast 6.1
library(scales)
Thomas asked when will SO [r] tag reach 100.000 questions. Any bets? Data querried from this site.
qs <- read.table("number_r_qs_per_day.csv", header = TRUE, sep = ",")
qs$r <- NULL
qs$Datum <- as.Date(with(qs, paste(Year, Month, Day, sep = "-")), format = "%Y-%m-%d")
qs$Weekday <- as.POSIXlt(qs$Datum)$wday
qs <- qs[order(qs$Datum), ]
qs$CumSum <- cumsum(qs$Total)
qs$Workday <- NA
qs$Workday[qs$Weekday %in% c(0, 6)] <- "Weekend"
qs$Workday[qs$Weekday %in% 1:5] <- "Non-Weekend"
ggplot(qs, aes(x = Datum, y = Total, color = Workday)) +
theme_bw() +
scale_color_brewer(palette = "Set1") +
geom_point()
Cummulative number of questions per day.
ggplot(qs, aes(x = Datum, y = CumSum)) +
theme_bw() +
geom_line()
Predict when reaching 100.000 questions (used this Q to forecast).
qs.ts <- ts(qs$CumSum, start = c(2011, yday("2008-09-16")), frequency = 1)
qs.mdl <- forecast(ets(qs.ts), 60)
plot(qs.mdl)
abline(h = 100000)
data.frame(after_22_6_2015 = qs.mdl$mean)
## after_22_6_2015
## 1 95888.12
## 2 96011.24
## 3 96134.35
## 4 96257.46
## 5 96380.58
## 6 96503.69
## 7 96626.80
## 8 96749.92
## 9 96873.03
## 10 96996.14
## 11 97119.25
## 12 97242.37
## 13 97365.48
## 14 97488.59
## 15 97611.71
## 16 97734.82
## 17 97857.93
## 18 97981.05
## 19 98104.16
## 20 98227.27
## 21 98350.38
## 22 98473.50
## 23 98596.61
## 24 98719.72
## 25 98842.84
## 26 98965.95
## 27 99089.06
## 28 99212.18
## 29 99335.29
## 30 99458.40
## 31 99581.51
## 32 99704.63
## 33 99827.74
## 34 99950.85
## 35 100073.97
## 36 100197.08
## 37 100320.19
## 38 100443.30
## 39 100566.42
## 40 100689.53
## 41 100812.64
## 42 100935.76
## 43 101058.87
## 44 101181.98
## 45 101305.10
## 46 101428.21
## 47 101551.32
## 48 101674.43
## 49 101797.55
## 50 101920.66
## 51 102043.77
## 52 102166.89
## 53 102290.00
## 54 102413.11
## 55 102536.23
## 56 102659.34
## 57 102782.45
## 58 102905.56
## 59 103028.68
## 60 103151.79
Based on this model, SO [r] tag will hit the 100.000 questions in 34 or 35 days. In human readable format that means on 2015-07-26 or 2015-07-27.
plot(qs.mdl, xlim = c(4000, 4600), axes = FALSE)
abline(h = 100000)
axis(side = 1, at = qs$Datum, labels = qs$Datum)
axis(side = 2)