Gun Control and Mass Shootings

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(pageviews)        # This package gets data on Wikipedia viewing
library(DT)               # DT stands for datatable, and creates interactive tables
library(infer)            # for some stats like t_test
library(devtools)
Loading required package: usethis
gun_control <- article_pageviews(article = "Gun control", start = as.Date("2017-1-1"), end = as.Date("2023-12-31"))

glimpse(gun_control)
Rows: 2,556
Columns: 8
$ project     <chr> "wikipedia", "wikipedia", "wikipedia", "wikipedia", "wikip…
$ language    <chr> "en", "en", "en", "en", "en", "en", "en", "en", "en", "en"…
$ article     <chr> "Gun_control", "Gun_control", "Gun_control", "Gun_control"…
$ access      <chr> "all-access", "all-access", "all-access", "all-access", "a…
$ agent       <chr> "all-agents", "all-agents", "all-agents", "all-agents", "a…
$ granularity <chr> "daily", "daily", "daily", "daily", "daily", "daily", "dai…
$ date        <dttm> 2017-01-01, 2017-01-02, 2017-01-03, 2017-01-04, 2017-01-0…
$ views       <dbl> 285, 393, 434, 544, 635, 620, 431, 464, 610, 595, 686, 688…
gun_violence <- article_pageviews(article = "Gun violence in the United States", start = as.Date("2017-1-1"), end = as.Date("2023-12-31"))

glimpse(gun_violence)
Rows: 2,556
Columns: 8
$ project     <chr> "wikipedia", "wikipedia", "wikipedia", "wikipedia", "wikip…
$ language    <chr> "en", "en", "en", "en", "en", "en", "en", "en", "en", "en"…
$ article     <chr> "Gun_violence_in_the_United_States", "Gun_violence_in_the_…
$ access      <chr> "all-access", "all-access", "all-access", "all-access", "a…
$ agent       <chr> "all-agents", "all-agents", "all-agents", "all-agents", "a…
$ granularity <chr> "daily", "daily", "daily", "daily", "daily", "daily", "dai…
$ date        <dttm> 2017-01-01, 2017-01-02, 2017-01-03, 2017-01-04, 2017-01-0…
$ views       <dbl> 877, 1033, 1173, 1186, 1298, 2281, 2275, 1491, 1503, 2460,…
guns <- bind_rows(gun_control,gun_violence) 
guns %>%  
  ggplot(aes(x = date, y = views, color = article)) +
  geom_line()

This graph shows the correlation between views on the Gun Control and Gun Violence in the United States Wikipedia pages according to date.

guns %>%  
  pivot_wider(names_from = article, values_from = views) %>% 
  ggplot(aes(x = Gun_violence_in_the_United_States, y = Gun_control)) +          # create scatterplot
  geom_point() +
  geom_smooth(method = lm) +                                  # create regression line
  labs(x = "Views of the Wikipedia Gun violence in the US article", 
       y = "Views of the Wikipedia Gun control article", 
       title = "Relationship between Wikipedia article views")
`geom_smooth()` using formula = 'y ~ x'

This scatterplot shows the analysis between the views on the Gun Violence in the US Wikipedia page and the views of the Gun Control Wikipedia page.

top <- top_articles(start = as.Date("2019-10-19"))
top %>% 
  select(article, views) %>% 
  filter(!article == "Main_Page", !article == "Special:Search") %>% 
  slice_max(views, n = 10) %>% 
  datatable()
top %>%  
  select(article, views) %>% 
  filter(!article == "Main_Page", !article == "Special:Search") %>% 
  top_n(10, views) %>%  
  ggplot(aes(x = as_factor(article), y = views)) +
  coord_flip() +
  geom_col(fill = "Orange") +
labs(y ="Number of Views", x = "Article", title = "Top Wikipedia articles, Oct. 19, 2019")

The bar graph shows that views on Wikipedia did not show increased interest in the Grambling State University shooting on October 19, 2019 which was the day after the tragedy.

top <- top_articles(start = as.Date("2019-10-23"))
top %>% 
  select(article, views) %>% 
  filter(!article == "Main_Page", !article == "Special:Search") %>% 
  slice_max(views, n = 10) %>% 
  datatable()
top %>%  
  select(article, views) %>% 
  filter(!article == "Main_Page", !article == "Special:Search") %>% 
  top_n(10, views) %>%  
  ggplot(aes(x = as_factor(article), y = views)) +
  coord_flip() +
  geom_col(fill = "Orange") +
labs(y ="Number of Views", x = "Article", title = "Top Wikipedia articles, Oct. 23, 2019")

The bar graph shows that the Ridgeway High School shooting did not become a top viewed Wikipedia article on the day after the incident (10/22/2019)

california <- article_pageviews(article = "Gun_control",
                           start = as.Date("2019-10-7"),
                           end = as.Date("2019-11-4"))
louisiana <- article_pageviews(article = "Gun_control",
                           start = as.Date("2019-10-4"),
                           end = as.Date("2019-11-1"))
louisiana <- louisiana %>%  
  mutate(day = -14:14) %>% 
  mutate(event = "louisiana")
california <- california %>%  
  mutate(day = -14:14) %>% 
  mutate(event = "california")
louisiana %>% 
  ggplot(aes(x = day, y = views)) +
  geom_line()

california %>%  
  ggplot(aes(x = day, y = views)) +
  geom_line()

shootings <- bind_rows(louisiana, california)

shootings %>% 
  ggplot(aes(x = day, y = views, color = event)) +
  geom_line() +
  theme_minimal() +
  labs(x = "Days before/after Shooting", 
       y = "Wikipedia Views", 
       color = "Event", 
       title = "Views of the Wikipedia Gun Control Article before and after Two Mass Shootings")

The graph shows the wikipedia views of Gun Control days before and after a school shooting in both California and Louisiana.

shootings %>% 
  mutate(after_event = (day > 0)) %>% 
  t_test(views ~ after_event)
Warning: The statistic is based on a difference or ratio; by default, for
difference-based statistics, the explanatory variable is subtracted in the
order "TRUE" - "FALSE", or divided in the order "TRUE" / "FALSE" for
ratio-based statistics. To specify this order yourself, supply `order =
c("TRUE", "FALSE")`.
# A tibble: 1 × 7
  statistic  t_df p_value alternative estimate lower_ci upper_ci
      <dbl> <dbl>   <dbl> <chr>          <dbl>    <dbl>    <dbl>
1     0.696  53.5   0.490 two.sided       35.1    -66.1     136.
shootings %>% 
  mutate(after_event = (day > 0)) %>% 
  group_by(after_event) %>% 
  summarize(Mean = mean(views),
            StdDev = sd(views),
            N = n())
# A tibble: 2 × 4
  after_event  Mean StdDev     N
  <lgl>       <dbl>  <dbl> <int>
1 FALSE        858.   177.    30
2 TRUE         894.   205.    28

The average number of views of the Wikipedia Gun Control article in the 7 days prior to the two shootings (M = 883.2, SD = 212.90) was not statistically significantly different from the average number of views in the 7 days after the shooting (M = 909.61, SD = 258.26), t(52.5) = 0.42, p = 0.674.