library(ggplot2)
library(lubridate)
library(forecast)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Loading required package: timeDate
## This is forecast 6.1
library(scales)

Thomas asked when will SO [r] tag reach 100.000 questions. Any bets? Data querried from this site.

qs <- read.table("number_r_qs_per_day.csv", header = TRUE, sep = ",")
qs$r <- NULL
qs$Datum <- as.Date(with(qs, paste(Year, Month, Day, sep = "-")), format = "%Y-%m-%d")
qs$Weekday <- as.POSIXlt(qs$Datum)$wday
qs <- qs[order(qs$Datum), ]
qs$CumSum <- cumsum(qs$Total)

qs$Workday <- NA
qs$Workday[qs$Weekday %in% c(0, 6)] <- "Weekend"
qs$Workday[qs$Weekday %in% 1:5] <- "Non-Weekend"
ggplot(qs, aes(x = Datum, y = Total, color = Workday)) +
  theme_bw() +
  scale_color_brewer(palette = "Set1") +
  geom_point()

Cummulative number of questions per day.

ggplot(qs, aes(x = Datum, y = CumSum)) +
  theme_bw() +
  geom_line()

Predict when reaching 100.000 questions (used this Q to forecast).

qs.ts <- ts(qs$CumSum, start = c(2011, yday("2008-09-16")), frequency = 1)
qs.mdl <- forecast(ets(qs.ts), 60)
plot(qs.mdl)
abline(h = 100000)

data.frame(after_22_6_2015 = qs.mdl$mean)
##    after_22_6_2015
## 1         95888.12
## 2         96011.24
## 3         96134.35
## 4         96257.46
## 5         96380.58
## 6         96503.69
## 7         96626.80
## 8         96749.92
## 9         96873.03
## 10        96996.14
## 11        97119.25
## 12        97242.37
## 13        97365.48
## 14        97488.59
## 15        97611.71
## 16        97734.82
## 17        97857.93
## 18        97981.05
## 19        98104.16
## 20        98227.27
## 21        98350.38
## 22        98473.50
## 23        98596.61
## 24        98719.72
## 25        98842.84
## 26        98965.95
## 27        99089.06
## 28        99212.18
## 29        99335.29
## 30        99458.40
## 31        99581.51
## 32        99704.63
## 33        99827.74
## 34        99950.85
## 35       100073.97
## 36       100197.08
## 37       100320.19
## 38       100443.30
## 39       100566.42
## 40       100689.53
## 41       100812.64
## 42       100935.76
## 43       101058.87
## 44       101181.98
## 45       101305.10
## 46       101428.21
## 47       101551.32
## 48       101674.43
## 49       101797.55
## 50       101920.66
## 51       102043.77
## 52       102166.89
## 53       102290.00
## 54       102413.11
## 55       102536.23
## 56       102659.34
## 57       102782.45
## 58       102905.56
## 59       103028.68
## 60       103151.79

Based on this model, SO [r] tag will hit the 100.000 questions in 34 or 35 days. In human readable format that means on 2015-07-26 or 2015-07-27.

plot(qs.mdl, xlim = c(4000, 4600), axes = FALSE)
abline(h = 100000)
axis(side = 1, at = qs$Datum, labels = qs$Datum)
axis(side = 2)