library(lubridate)
library(tidyverse)
dt <- readxl::read_excel('reviewsNINA.xlsx')
dt <- dt %>% mutate(date = ymd(date), roundDat= round_date(date,'month'))
Total reviews:
nrow(dt)
## [1] 4490
ggplot(data = dt, aes(x = round_date(date, '4 months'), y = rating)) +
stat_summary(fun.y="mean", geom="line") +
ylim(0,5) +
ggtitle("NINA: Mean score (4-months)") +
scale_x_date(name = 'date', date_labels = '%b %y', breaks = '8 month')
ggplot(data = dt) +
geom_histogram(aes(x = date), bins = 120) +
scale_x_date(date_labels = '%b %y', breaks = '8 month') +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
ggtitle('Number of reviews by date')
We can also extract the review for people who reviewed more than once
dt_double <- dt %>% count(names) %>% arrange(desc(n)) %>% filter(between(n , 2, 5)) %>% left_join(dt, by = 'names')
dt_double
## # A tibble: 57 x 7
## names n date review helpfulVotes rating roundDat
## <chr> <int> <date> <chr> <chr> <dbl> <date>
## 1 Boris … 3 2019-02-10 Bitte bringt di… 2 2 2019-02-01
## 2 Boris … 3 2018-09-06 Rückmeldung aus… 0 1 2018-09-01
## 3 Boris … 3 2017-07-14 Ahhh. Endlich i… 1 5 2017-07-01
## 4 Hans D… 3 2016-08-16 Funktioniert ta… 0 5 2016-08-01
## 5 Hans D… 3 2016-07-23 Katwarn ist das… 0 1 2016-08-01
## 6 Hans D… 3 2015-07-30 Gute und wichti… 0 4 2015-08-01
## 7 Stefan… 3 2017-01-11 Stimmt genau 0 5 2017-01-01
## 8 Stefan… 3 2017-01-04 Macht was sie s… 0 5 2017-01-01
## 9 Stefan… 3 2015-08-18 Katwarn hatte s… 1 1 2015-09-01
## 10 Andreas 2 2018-01-18 Da derzeit zum … 0 4 2018-02-01
## # … with 47 more rows