library(tidyverse)
library(pageviews) # This package gets data on Wikipedia viewing
library(DT) # DT stands for datatable, and creates interactive tables
library(infer) # for some stats like t_test
library(devtools)
These are the packages that I used for this assignment.
Question 1.
'Shootings' <- article_pageviews(article = "Mass shootings in the United States", start = as.Date("2017-1-1"), end = as.Date("2023-12-31"))
Shootings |>
slice_max(views, n = 10)
Shootings |>
ggplot(aes(x = date, y = views)) +
geom_line()
This is the graph for the views of the article Mass shootings in the United States over time.
gun_control <- article_pageviews(article = "Gun control", start = as.Date("2017-1-1"), end = as.Date("2023-12-31"))
gun_control |>
slice_max(views, n = 10)
gun_control |>
ggplot(aes(x = date, y = views)) +
geom_line()
This is the graph for the views of the article Gun Control over time.
guns <- bind_rows(gun_control, Shootings)
guns |>
ggplot(aes(x = date, y = views, color = article)) +
geom_line()
This is a graph with the views of the articles Gun Control and Mass shootings in the United States together. It looks like there are similar spikes in views for both articles. When people were looking up mass shootings in the United States they also looked up gun control.
Question 2.
guns |>
pivot_wider(names_from = article, values_from = views)
guns |>
pivot_wider(names_from = article, values_from = views) |>
ggplot(aes(x = Mass_shootings_in_the_United_States, y = Gun_control)) + # create scatterplot
geom_point() +
geom_smooth(method = lm) + # create regression line
labs(x = "Views of the Wikipedia Mass shootings in the US article",
y = "Views of the Wikipedia Gun control article",
title = "Relationship between Wikipedia article views")
This is the scatter plot showing the views of the two articles.
Question 3.
top <- top_articles(start = as.Date("2019-8-4"))
top |>
select(article, views) |>
filter(!article == "Main_Page", !article == "Special:Search") |>
datatable()
This is a table showing the top article searches the day after the mass shooting in El Paso, Texas in 2019.
top |>
select(article, views) |>
filter(!article == "Main_Page", !article == "Special:Search") |>
top_n(10, views) |>
ggplot(aes(x = fct_rev(as_factor(article)), y = views)) +
geom_col(fill = "green") +
coord_flip() +
scale_y_continuous(labels = scales::comma) +
labs(y = "Number of Views", x = "Article", title = "Top Wikipedia articles, Aug. 4, 2019")
This is a bar graph showing the top article searches the day after the El Paso Shooting.
tops <- top_articles(start = as.Date("2019-8-5"))
tops |>
select(article, views) |>
filter(!article == "Main_Page", !article == "Special:Search") |>
datatable()
This is a table showing the top article searches the day after the mass shooting in Dayton, Ohio in 2019.
tops |>
select(article, views) |>
filter(!article == "Main_Page", !article == "Special:Search") |>
top_n(10, views) |>
ggplot(aes(x = fct_rev(as_factor(article)), y = views)) +
geom_col(fill = "green") +
coord_flip() +
scale_y_continuous(labels = scales::comma) +
labs(y = "Number of Views", x = "Article", title = "Top Wikipedia articles, Aug. 5, 2019")
This is a bar graph showing the top article searches the day after the Dayton, Ohio shooting.
Question 4.
gun_controler <- article_pageviews(article = "Gun control", start = as.Date("2019-7-27"), end = as.Date("2019-8-10"))
glimpse(gun_control)
Rows: 15
Columns: 8
$ project <chr> "wikipedia", "wikipedia", "wikipedia", "wikipedia", "wikip…
$ language <chr> "en", "en", "en", "en", "en", "en", "en", "en", "en", "en"…
$ article <chr> "Gun_control", "Gun_control", "Gun_control", "Gun_control"…
$ access <chr> "all-access", "all-access", "all-access", "all-access", "a…
$ agent <chr> "all-agents", "all-agents", "all-agents", "all-agents", "a…
$ granularity <chr> "daily", "daily", "daily", "daily", "daily", "daily", "dai…
$ date <dttm> 2019-07-27, 2019-07-28, 2019-07-29, 2019-07-30, 2019-07-3…
$ views <dbl> 340, 325, 677, 556, 878, 466, 323, 614, 3238, 3434, 2748, …
gun_controler <- gun_controler %>%
mutate(day = -7:7) %>%
mutate(event = "gun_controler")
gun_controler %>%
ggplot(aes(x = day, y = views)) +
geom_line()
This is a graph showing the views of the gun control article a week before and after the shooting in El Paso, Texas.
kennesaw <- article_pageviews(article = "Gun control", start = as.Date("2019-12-20"), end = as.Date("2020-1-3"))
glimpse(gun_control)
Rows: 2,556
Columns: 8
$ project <chr> "wikipedia", "wikipedia", "wikipedia", "wikipedia", "wikip…
$ language <chr> "en", "en", "en", "en", "en", "en", "en", "en", "en", "en"…
$ article <chr> "Gun_control", "Gun_control", "Gun_control", "Gun_control"…
$ access <chr> "all-access", "all-access", "all-access", "all-access", "a…
$ agent <chr> "all-agents", "all-agents", "all-agents", "all-agents", "a…
$ granularity <chr> "daily", "daily", "daily", "daily", "daily", "daily", "dai…
$ date <dttm> 2017-01-01, 2017-01-02, 2017-01-03, 2017-01-04, 2017-01-0…
$ views <dbl> 285, 393, 434, 544, 635, 620, 431, 464, 610, 595, 686, 688…
kennesaw <- kennesaw %>%
mutate(day = -7:7) %>%
mutate(event = "kennesaw")
kennesaw %>%
ggplot(aes(x = day, y = views)) +
geom_line()
This is a graph showing the views of the gun control article a week before and after a shooting in Kennesaw Georgia in 2019.
gunsfour <- bind_rows(gun_controler, kennesaw)
gunsfour %>%
ggplot(aes(x = day, y = views, color = event)) +
geom_line() +
theme_minimal() +
labs(x = "Days before/after Shooting",
y = "Wikipedia Views",
color = "Event",)
This graph shows the views of the gun control article a week before and after the shootings in Texas and Georgia. The graph shows that there were more views of gun control during the time of the Texas shooting.
Question 5.
gunsfour %>%
mutate(after_event = (day > 0)) %>%
t_test(views ~ after_event)
Warning: The statistic is based on a difference or ratio; by default, for
difference-based statistics, the explanatory variable is subtracted in the
order "TRUE" - "FALSE", or divided in the order "TRUE" / "FALSE" for
ratio-based statistics. To specify this order yourself, supply `order =
c("TRUE", "FALSE")`.
gunsfour %>%
mutate(after_event = (day > 0)) %>%
group_by(after_event) %>%
summarize(Mean = mean(views),
StdDev = sd(views),
N = n())
The average number of views of the Gun Control article seven days prior to the shootings (M = 521, SD = 200.80) was statistically significantly different from the average number of views seven days after the shootings (M = 1360, SD = 1067.47), t(13.81) = 2.90, p = .01.