library(lubridate)
library(tidyverse)
dt <- readxl::read_excel('reviews.xlsx')
dt <- dt %>% mutate(date = ymd(date), roundDat= round_date(date,'month'))
Total reviews:
nrow(dt)
## [1] 2473
ggplot(data = dt, aes(x = round_date(date, '4 months'), y = rating)) +
stat_summary(fun.y="mean", geom="line") +
ylim(0,5) +
ggtitle("NINA: Mean score (4-months)") +
scale_x_date(name = 'date', date_labels = '%b %y', breaks = '8 month')
ggplot(data = dt) +
geom_histogram(aes(x = date), bins = 120) +
scale_x_date(date_labels = '%b %y', breaks = '8 month') +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
ggtitle('Number of reviews by date')
We can also extract the review for people who reviewed more than once
dt_double <- dt %>% count(names) %>% arrange(desc(n)) %>% filter(between(n , 2, 5)) %>% left_join(dt, by = 'names')
dt_double
## # A tibble: 25 x 7
## names n date review helpfulVotes rating roundDat
## <chr> <int> <date> <chr> <chr> <dbl> <date>
## 1 Michael… 3 2016-07-27 Suche Möglichk… 0 4 2016-08-01
## 2 Michael… 3 2016-07-22 Kostet die Kom… 28 1 2016-08-01
## 3 Michael… 3 2016-01-22 Eigentlich ein… 3 2 2016-02-01
## 4 B. S. 2 2017-02-05 Die App unters… 0 1 2017-02-01
## 5 B. S. 2 2016-07-22 Keine Verbindu… 1 1 2016-08-01
## 6 Christi… 2 2018-01-03 Warnt viel zu … 3 1 2018-01-01
## 7 Christi… 2 2015-09-20 Testwarnung ha… 0 1 2015-10-01
## 8 Christi… 2 2017-07-23 Grösste Gewitt… 1 1 2017-08-01
## 9 Christi… 2 2016-07-23 Pushnachrichte… 0 2 2016-08-01
## 10 Denis S… 2 2019-05-12 Top. Informier… 2 5 2019-05-01
## # … with 15 more rows