library(httr)
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
rvc <- ''
url <- ''

getRevDates <- function(title, rvid) {
  pl <- list(
    action='query', 
    prop='revisions', 
    titles=title,
    format='json',
    rvlimit='500')
  if (rvid != '') {
    pl <- c(pl, rvcontinue=rvid)
  }
  r <- POST(url, body = pl)
  res <- content(r)
  rev <- res$query$pages[[1]]$revisions
  date <- unlist(lapply(rev, function(x) x$timestamp))
  date_posix <- as.POSIXct(strptime(date, "%Y-%m-%dT%H:%M:%SZ", tz="UTC"))
  rvc <<- res$continue$rvcontinue
  return(date)
}

getRevisionHistory <- function(title, lang){
  results <- vector()
  rvc <<-''
  url <<- paste("https://", lang, ".wikipedia.org/w/api.php", sep = "")
  # res$continue$rvcontinue가 없을 때까지 반복
  while(!is.null(rvc)) {
    t <- getRevDates(title, rvc)
    results <- c(results, t)
  }
  date_posixct <-  as.POSIXct(strptime(results, "%Y-%m-%dT%H:%M:%SZ", tz="UTC"))
  rt <- as.data.frame(date_posixct, date)
  rt <- rt %>% 
    mutate(date = as.Date(date_posixct)) %>%
    mutate(title = title)
  return(rt)
}

# titles <- c('Seoul', 'Tokyo', 'Beijing', 'Shanghai', 'Hong_Kong', 'Singapore', 'Taipei')
titles <- c('Girls\'_Generation', 'Kara_(South_Korean_band)', 'Wonder_Girls',
            '2NE1', 'F(x)_(band)', 'Secret_(South_Korean_band)',
            'Sistar', 'Miss_A', 'Girl\'s_Day',
            'AOA_(band)', 'EXID', 'Apink',
            'GFriend', 'Red_Velvet_(band)', 'Mamamoo', 'Twice_(band)',
            'Lovelyz'
            )

# titles <- c('FC_Barcelona', 'Real_Madrid_C.F.', 'Atlético_Madrid', 'FC_Bayern Munich', 
#             'Juventus_F.C.', 'Manchester_United_F.C.', 'Arsenal_F.C.', 'Manchester_City_F.C.', 'Chelsea F.C.', 'Liverpool_F.C.',
#             'Borussia_Dortmund', 'A.C._Milan', 'Tottenham_Hotspur_F.C.')

datalist <- list()
i <- 1
for (title in titles) {
  print(title)
  datalist[[i]] <- getRevisionHistory(title, "en")
  i <- i + 1
}
## [1] "Girls'_Generation"
## Warning in as.data.frame.POSIXct(date_posixct, date): 'row.names' is not a
## character vector of length 13666 -- omitting it. Will be an error!
## [1] "Kara_(South_Korean_band)"
## Warning in as.data.frame.POSIXct(date_posixct, date): 'row.names' is not a
## character vector of length 4349 -- omitting it. Will be an error!
## [1] "Wonder_Girls"
## Warning in as.data.frame.POSIXct(date_posixct, date): 'row.names' is not a
## character vector of length 4271 -- omitting it. Will be an error!
## [1] "2NE1"
## Warning in as.data.frame.POSIXct(date_posixct, date): 'row.names' is not a
## character vector of length 6341 -- omitting it. Will be an error!
## [1] "F(x)_(band)"
## Warning in as.data.frame.POSIXct(date_posixct, date): 'row.names' is not a
## character vector of length 4775 -- omitting it. Will be an error!
## [1] "Secret_(South_Korean_band)"
## Warning in as.data.frame.POSIXct(date_posixct, date): 'row.names' is not a
## character vector of length 1239 -- omitting it. Will be an error!
## [1] "Sistar"
## Warning in as.data.frame.POSIXct(date_posixct, date): 'row.names' is not a
## character vector of length 2194 -- omitting it. Will be an error!
## [1] "Miss_A"
## Warning in as.data.frame.POSIXct(date_posixct, date): 'row.names' is not a
## character vector of length 2438 -- omitting it. Will be an error!
## [1] "Girl's_Day"
## Warning in as.data.frame.POSIXct(date_posixct, date): 'row.names' is not a
## character vector of length 1651 -- omitting it. Will be an error!
## [1] "AOA_(band)"
## Warning in as.data.frame.POSIXct(date_posixct, date): 'row.names' is not a
## character vector of length 1384 -- omitting it. Will be an error!
## [1] "EXID"
## Warning in as.data.frame.POSIXct(date_posixct, date): 'row.names' is not a
## character vector of length 1169 -- omitting it. Will be an error!
## [1] "Apink"
## Warning in as.data.frame.POSIXct(date_posixct, date): 'row.names' is not a
## character vector of length 3285 -- omitting it. Will be an error!
## [1] "GFriend"
## Warning in as.data.frame.POSIXct(date_posixct, date): 'row.names' is not a
## character vector of length 1903 -- omitting it. Will be an error!
## [1] "Red_Velvet_(band)"
## Warning in as.data.frame.POSIXct(date_posixct, date): 'row.names' is not a
## character vector of length 2374 -- omitting it. Will be an error!
## [1] "Mamamoo"
## Warning in as.data.frame.POSIXct(date_posixct, date): 'row.names' is not a
## character vector of length 1132 -- omitting it. Will be an error!
## [1] "Twice_(band)"
## Warning in as.data.frame.POSIXct(date_posixct, date): 'row.names' is not a
## character vector of length 2382 -- omitting it. Will be an error!
## [1] "Lovelyz"
## Warning in as.data.frame.POSIXct(date_posixct, date): 'row.names' is not a
## character vector of length 1472 -- omitting it. Will be an error!
rt <- bind_rows(datalist)

# horizontal chart
bandh <- 200
rtg <- rt %>%
  group_by(p=cut(date_posixct, "1 month"), title) %>%
  summarise(count=n()) %>%
  mutate(level=count %/% bandh, 
         rem = count %% bandh,
         date = as.Date(p))

max(rtg$count)
## [1] 708
max(rtg$level)
## [1] 3
alphaStep = 1 / (max(rtg$level) + 1)

ggplot(rtg, aes(x=date)) +
  geom_bar(aes(y=bandh, alpha = level), fill='darkred', stat='identity') +
  scale_alpha_continuous(range = c(alphaStep, 1)) +
  geom_bar(aes(y=rem), alpha = min(1, alphaStep), fill='darkred', stat='identity') +
  scale_x_date(date_breaks = "3 month", date_labels = "%Y-%m") +
  ylab(NULL) +
  facet_grid(title ~.) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 60, hjust = 1),
        axis.text.y = element_blank(),
        legend.position="none", 
        strip.text.y = element_text(angle=0),
        strip.background = element_rect(fill = "white")) 

# # histogram
# ggplot(rt, aes(x=date)) + 
#   geom_histogram(binwidth=10, alpha = 6/10, aes(y=..count..), fill="purple") +
#   scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
#   # annotate("text", x = as.Date("2015-01-15"), y = 80, label = "Glass Bead") + # 유리구슬
#   # annotate("text", x = as.Date("2015-07-23"), y = 50, label = "Me gustas tu") + # 오늘부터 우리는
#   # annotate("text", x = as.Date("2016-01-25"), y = 180, label = "Rough") + # 시간을 달려서
#   # annotate("text", x = as.Date("2016-07-11"), y = 100, label = "NAVILLERA") + #너 그리고 나
#   labs(title = "Number of revisions in the Wikipedia page") +
#   ylab("Count in 10 days") +
#   facet_grid(title ~.) +
#   theme(axis.text.x = element_text(angle = 60, hjust = 1))