Student Details

Story URL

https://www.abc15.com/news/data/school-shootings-in-u-s-when-where-each-shooting-has-occurred-in-2018

Visualisation URL

http://rpubs.com/Razzak/393878

Code

library(flexdashboard)
library(ggplot2)
library(dplyr)
library(plotly)
library(reshape2)
library(shiny)
library(readr)
library(lubridate)
library(tidyverse)
library(ggfortify) 
library(tseries) 
library(forecast) 
library(xts)
library(astsa)
library(DT)
library(gapminder)
library(wesanderson)
library(maps)
library(plyr)

getwd()
## [1] "/Users/mohammadrazzak/Documents/University/RMIT/dataviz/Assignment 3"
##read Data
df<- read.csv("pah_wikp_combo.csv",na.strings = c("", "NA","-"))


D2<- read.csv("cps_01_formatted.csv",na.strings = c("", "NA","-"))

D2N <- D2[-c(39,56:64), ]

#Pre-processing 
str(df)
## 'data.frame':    656 obs. of  10 variables:
##  $ Date      : Factor w/ 489 levels "1/10/01","1/10/12",..: 297 376 427 439 68 256 340 340 454 454 ...
##  $ City      : Factor w/ 345 levels "Acton","Acushnet",..: 35 52 163 280 270 84 69 69 74 74 ...
##  $ State     : Factor w/ 53 levels "Alabama","Alaska",..: 36 44 32 45 45 45 5 5 45 45 ...
##  $ AreaType  : Factor w/ 3 levels "rural","suburban",..: NA 2 3 NA 3 NA 3 NA 1 NA ...
##  $ School    : Factor w/ 4 levels "C","ES","HS",..: 1 3 3 3 3 3 4 4 3 3 ...
##  $ Fatalities: int  0 1 1 0 0 1 1 1 1 1 ...
##  $ Wounded   : int  1 NA NA 3 NA 0 NA 0 NA 0 ...
##  $ Dupe      : logi  NA NA NA NA NA NA ...
##  $ Source    : Factor w/ 2 levels "Pah","Wikp": 2 1 1 2 1 2 1 2 1 2 ...
##  $ Desc      : Factor w/ 275 levels "10-year-old Jason Osmanson, teased because his parents have AIDS, killed an 11-year-old student on the playgrou"| __truncated__,..: 108 NA NA 61 NA 23 NA 127 NA 37 ...
str(D2)
## 'data.frame':    64 obs. of  18 variables:
##  $ Year          : int  2018 2017 2016 2015 2014 2013 2012 2011 2010 2009 ...
##  $ Total.enrolled: int  77149 77149 77232 77066 77214 77772 78426 79043 78519 77288 ...
##  $ N.Total       : int  4639 4639 4746 4532 4694 4682 4628 4946 4835 4708 ...
##  $ N.Public      : int  2708 2708 2806 2610 2693 2558 2732 2904 2776 2744 ...
##  $ N.Private     : int  1932 1932 1941 1922 2001 2124 1896 2042 2059 1964 ...
##  $ K.Total       : int  4045 4045 4017 4073 4069 4150 4138 4214 4172 4132 ...
##  $ K.Public      : int  3649 3649 3654 3644 3617 3725 3684 3732 3764 3767 ...
##  $ K.Private     : int  396 396 364 428 453 425 454 482 408 365 ...
##  $ E.Total       : int  32715 32715 32604 32826 32622 32873 32683 32872 32663 32238 ...
##  $ E.Public      : int  30076 30076 29978 30173 29805 30171 29865 29965 29841 29365 ...
##  $ E.Private     : int  2640 2640 2627 2653 2817 2702 2818 2907 2822 2874 ...
##  $ H.Total       : int  16602 16602 16668 16535 16654 16601 17047 16613 16574 16445 ...
##  $ H.Public      : int  15344 15344 15330 15358 15379 15468 15704 15426 15338 15269 ...
##  $ H.Private     : int  1258 1258 1338 1177 1275 1133 1343 1187 1236 1177 ...
##  $ C.Total       : int  19149 19149 19196 19101 19175 19467 19930 20397 20275 19764 ...
##  $ C.Public      : int  15073 15073 14971 15175 15325 15514 15778 16134 16153 15722 ...
##  $ C.Private     : int  4076 4076 4225 3926 3850 3953 4152 4263 4122 4042 ...
##  $ C.Full.time   : int  14329 14329 14421 14236 14400 14228 14602 14903 14600 14364 ...
glimpse(D2N)
## Observations: 54
## Variables: 18
## $ Year           <int> 2018, 2017, 2016, 2015, 2014, 2013, 2012, 2011,...
## $ Total.enrolled <int> 77149, 77149, 77232, 77066, 77214, 77772, 78426...
## $ N.Total        <int> 4639, 4639, 4746, 4532, 4694, 4682, 4628, 4946,...
## $ N.Public       <int> 2708, 2708, 2806, 2610, 2693, 2558, 2732, 2904,...
## $ N.Private      <int> 1932, 1932, 1941, 1922, 2001, 2124, 1896, 2042,...
## $ K.Total        <int> 4045, 4045, 4017, 4073, 4069, 4150, 4138, 4214,...
## $ K.Public       <int> 3649, 3649, 3654, 3644, 3617, 3725, 3684, 3732,...
## $ K.Private      <int> 396, 396, 364, 428, 453, 425, 454, 482, 408, 36...
## $ E.Total        <int> 32715, 32715, 32604, 32826, 32622, 32873, 32683...
## $ E.Public       <int> 30076, 30076, 29978, 30173, 29805, 30171, 29865...
## $ E.Private      <int> 2640, 2640, 2627, 2653, 2817, 2702, 2818, 2907,...
## $ H.Total        <int> 16602, 16602, 16668, 16535, 16654, 16601, 17047...
## $ H.Public       <int> 15344, 15344, 15330, 15358, 15379, 15468, 15704...
## $ H.Private      <int> 1258, 1258, 1338, 1177, 1275, 1133, 1343, 1187,...
## $ C.Total        <int> 19149, 19149, 19196, 19101, 19175, 19467, 19930...
## $ C.Public       <int> 15073, 15073, 14971, 15175, 15325, 15514, 15778...
## $ C.Private      <int> 4076, 4076, 4225, 3926, 3850, 3953, 4152, 4263,...
## $ C.Full.time    <int> 14329, 14329, 14421, 14236, 14400, 14228, 14602...
##Day month and year 
df <- df %>% mutate(new_date = mdy(Date))
df <- df %>% mutate(Day = factor(day(new_date)),
                    Month = factor(month(new_date)),
                    Year = factor(year(new_date)),
                    Wday = factor(wday(new_date))
)

df$Month <- factor(df$Month,
                  labels = c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep","Oct", "Nov", "Dec"))
df$Wday <- factor(df$Wday,
                  labels = c("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"))

total <- merge(df,D2N,by=c("Year"))

##Change levels name 
revalue(total$School, c("MS" = "Middle School")) -> total$School
revalue(total$School, c("C" = "College")) -> total$School
revalue(total$School, c("HS" = "High School")) -> total$School
revalue(total$School, c("ES" = "Elementary School")) -> total$School
levels(total$School)
## [1] "College"           "Elementary School" "High School"      
## [4] "Middle School"
#Specify Map Theme 
theme.map <- theme(
  text = element_text(family = 'Helvetica Neue', color = "black")
  ,panel.background = element_rect(fill = "#808080")
  ,plot.background = element_rect(fill = "#808080")
  ,legend.background = element_rect(fill = "#808080")
  ,panel.grid = element_blank()
  ,plot.title = element_text(size = 15, face = 'bold')
  ,plot.subtitle = element_text(size = 10)
  ,legend.key = element_blank()
  ,axis.text = element_blank()
  ,axis.ticks = element_blank()
  ,axis.title = element_blank()
)


##Plot map
smap <- map_data("state")

state_shoot <- df %>% group_by(State, Fatalities) %>% count() %>% arrange(desc(Fatalities)) 
state_shoot <- as.data.frame(state_shoot)
foo <- inner_join(smap, state_shoot %>% mutate(State=tolower(State)), by=c("region"="State"))

state_plot <- function(x) {
  foo$x <- foo[,x]
ggplot(data=smap, mapping = aes(x = long, y= lat , group = group), na.rm=TRUE) + geom_polygon(data = foo , aes(fill = x), color = "grey", size = 0.05) + labs(fill = x) + scale_fill_gradientn(colors = c(low="#7AC5CD",high="#68228B") , values = scales::rescale(c(5, 10, 15,20,40))) + theme.map
}

  
   state_plot("Fatalities") + labs(title = "US School Shootings 1990 - 2018", subtitle="The States of Virginia, Connecticut, Florida and Colorado recorded highest fatalities", 
                                  fill = str_c('Fatalities'))

##Plot2 
p<- plot_ly(data = total, x = ~School, y = ~Fatalities, type = "bar", mode = "markers",color = ~Month, colors = "Set1") %>% layout(title = 'Fatalities-Level of School(Monthly)')
p
##Plot 3
p2<- plot_ly(data =total, x = ~Fatalities, y = ~Year,color = ~Year,col = rainbow(10), colors = "Set1") %>% layout(title = 'Yearly Fatalities')
p2
##Plot 4
p3<- plot_ly(data =total, x = ~Wday, y = ~Fatalities) %>%
               add_trace(y = ~Fatalities, name = 'Days') %>% 
layout(title = "Fatalities(Daily)", yaxis = list(title = "Number of Fatalities"), 
         xaxis = list(title = "Days on which the Fatalities took place"))
p3