Code
library(flexdashboard)
library(ggplot2)
library(dplyr)
library(plotly)
library(reshape2)
library(shiny)
library(readr)
library(lubridate)
library(tidyverse)
library(ggfortify)
library(tseries)
library(forecast)
library(xts)
library(astsa)
library(DT)
library(gapminder)
library(wesanderson)
library(maps)
library(plyr)
getwd()
## [1] "/Users/mohammadrazzak/Documents/University/RMIT/dataviz/Assignment 3"
##read Data
df<- read.csv("pah_wikp_combo.csv",na.strings = c("", "NA","-"))
D2<- read.csv("cps_01_formatted.csv",na.strings = c("", "NA","-"))
D2N <- D2[-c(39,56:64), ]
#Pre-processing
str(df)
## 'data.frame': 656 obs. of 10 variables:
## $ Date : Factor w/ 489 levels "1/10/01","1/10/12",..: 297 376 427 439 68 256 340 340 454 454 ...
## $ City : Factor w/ 345 levels "Acton","Acushnet",..: 35 52 163 280 270 84 69 69 74 74 ...
## $ State : Factor w/ 53 levels "Alabama","Alaska",..: 36 44 32 45 45 45 5 5 45 45 ...
## $ AreaType : Factor w/ 3 levels "rural","suburban",..: NA 2 3 NA 3 NA 3 NA 1 NA ...
## $ School : Factor w/ 4 levels "C","ES","HS",..: 1 3 3 3 3 3 4 4 3 3 ...
## $ Fatalities: int 0 1 1 0 0 1 1 1 1 1 ...
## $ Wounded : int 1 NA NA 3 NA 0 NA 0 NA 0 ...
## $ Dupe : logi NA NA NA NA NA NA ...
## $ Source : Factor w/ 2 levels "Pah","Wikp": 2 1 1 2 1 2 1 2 1 2 ...
## $ Desc : Factor w/ 275 levels "10-year-old Jason Osmanson, teased because his parents have AIDS, killed an 11-year-old student on the playgrou"| __truncated__,..: 108 NA NA 61 NA 23 NA 127 NA 37 ...
str(D2)
## 'data.frame': 64 obs. of 18 variables:
## $ Year : int 2018 2017 2016 2015 2014 2013 2012 2011 2010 2009 ...
## $ Total.enrolled: int 77149 77149 77232 77066 77214 77772 78426 79043 78519 77288 ...
## $ N.Total : int 4639 4639 4746 4532 4694 4682 4628 4946 4835 4708 ...
## $ N.Public : int 2708 2708 2806 2610 2693 2558 2732 2904 2776 2744 ...
## $ N.Private : int 1932 1932 1941 1922 2001 2124 1896 2042 2059 1964 ...
## $ K.Total : int 4045 4045 4017 4073 4069 4150 4138 4214 4172 4132 ...
## $ K.Public : int 3649 3649 3654 3644 3617 3725 3684 3732 3764 3767 ...
## $ K.Private : int 396 396 364 428 453 425 454 482 408 365 ...
## $ E.Total : int 32715 32715 32604 32826 32622 32873 32683 32872 32663 32238 ...
## $ E.Public : int 30076 30076 29978 30173 29805 30171 29865 29965 29841 29365 ...
## $ E.Private : int 2640 2640 2627 2653 2817 2702 2818 2907 2822 2874 ...
## $ H.Total : int 16602 16602 16668 16535 16654 16601 17047 16613 16574 16445 ...
## $ H.Public : int 15344 15344 15330 15358 15379 15468 15704 15426 15338 15269 ...
## $ H.Private : int 1258 1258 1338 1177 1275 1133 1343 1187 1236 1177 ...
## $ C.Total : int 19149 19149 19196 19101 19175 19467 19930 20397 20275 19764 ...
## $ C.Public : int 15073 15073 14971 15175 15325 15514 15778 16134 16153 15722 ...
## $ C.Private : int 4076 4076 4225 3926 3850 3953 4152 4263 4122 4042 ...
## $ C.Full.time : int 14329 14329 14421 14236 14400 14228 14602 14903 14600 14364 ...
glimpse(D2N)
## Observations: 54
## Variables: 18
## $ Year <int> 2018, 2017, 2016, 2015, 2014, 2013, 2012, 2011,...
## $ Total.enrolled <int> 77149, 77149, 77232, 77066, 77214, 77772, 78426...
## $ N.Total <int> 4639, 4639, 4746, 4532, 4694, 4682, 4628, 4946,...
## $ N.Public <int> 2708, 2708, 2806, 2610, 2693, 2558, 2732, 2904,...
## $ N.Private <int> 1932, 1932, 1941, 1922, 2001, 2124, 1896, 2042,...
## $ K.Total <int> 4045, 4045, 4017, 4073, 4069, 4150, 4138, 4214,...
## $ K.Public <int> 3649, 3649, 3654, 3644, 3617, 3725, 3684, 3732,...
## $ K.Private <int> 396, 396, 364, 428, 453, 425, 454, 482, 408, 36...
## $ E.Total <int> 32715, 32715, 32604, 32826, 32622, 32873, 32683...
## $ E.Public <int> 30076, 30076, 29978, 30173, 29805, 30171, 29865...
## $ E.Private <int> 2640, 2640, 2627, 2653, 2817, 2702, 2818, 2907,...
## $ H.Total <int> 16602, 16602, 16668, 16535, 16654, 16601, 17047...
## $ H.Public <int> 15344, 15344, 15330, 15358, 15379, 15468, 15704...
## $ H.Private <int> 1258, 1258, 1338, 1177, 1275, 1133, 1343, 1187,...
## $ C.Total <int> 19149, 19149, 19196, 19101, 19175, 19467, 19930...
## $ C.Public <int> 15073, 15073, 14971, 15175, 15325, 15514, 15778...
## $ C.Private <int> 4076, 4076, 4225, 3926, 3850, 3953, 4152, 4263,...
## $ C.Full.time <int> 14329, 14329, 14421, 14236, 14400, 14228, 14602...
##Day month and year
df <- df %>% mutate(new_date = mdy(Date))
df <- df %>% mutate(Day = factor(day(new_date)),
Month = factor(month(new_date)),
Year = factor(year(new_date)),
Wday = factor(wday(new_date))
)
df$Month <- factor(df$Month,
labels = c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep","Oct", "Nov", "Dec"))
df$Wday <- factor(df$Wday,
labels = c("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"))
total <- merge(df,D2N,by=c("Year"))
##Change levels name
revalue(total$School, c("MS" = "Middle School")) -> total$School
revalue(total$School, c("C" = "College")) -> total$School
revalue(total$School, c("HS" = "High School")) -> total$School
revalue(total$School, c("ES" = "Elementary School")) -> total$School
levels(total$School)
## [1] "College" "Elementary School" "High School"
## [4] "Middle School"
#Specify Map Theme
theme.map <- theme(
text = element_text(family = 'Helvetica Neue', color = "black")
,panel.background = element_rect(fill = "#808080")
,plot.background = element_rect(fill = "#808080")
,legend.background = element_rect(fill = "#808080")
,panel.grid = element_blank()
,plot.title = element_text(size = 15, face = 'bold')
,plot.subtitle = element_text(size = 10)
,legend.key = element_blank()
,axis.text = element_blank()
,axis.ticks = element_blank()
,axis.title = element_blank()
)
##Plot map
smap <- map_data("state")
state_shoot <- df %>% group_by(State, Fatalities) %>% count() %>% arrange(desc(Fatalities))
state_shoot <- as.data.frame(state_shoot)
foo <- inner_join(smap, state_shoot %>% mutate(State=tolower(State)), by=c("region"="State"))
state_plot <- function(x) {
foo$x <- foo[,x]
ggplot(data=smap, mapping = aes(x = long, y= lat , group = group), na.rm=TRUE) + geom_polygon(data = foo , aes(fill = x), color = "grey", size = 0.05) + labs(fill = x) + scale_fill_gradientn(colors = c(low="#7AC5CD",high="#68228B") , values = scales::rescale(c(5, 10, 15,20,40))) + theme.map
}
state_plot("Fatalities") + labs(title = "US School Shootings 1990 - 2018", subtitle="The States of Virginia, Connecticut, Florida and Colorado recorded highest fatalities",
fill = str_c('Fatalities'))

##Plot2
p<- plot_ly(data = total, x = ~School, y = ~Fatalities, type = "bar", mode = "markers",color = ~Month, colors = "Set1") %>% layout(title = 'Fatalities-Level of School(Monthly)')
p
##Plot 3
p2<- plot_ly(data =total, x = ~Fatalities, y = ~Year,color = ~Year,col = rainbow(10), colors = "Set1") %>% layout(title = 'Yearly Fatalities')
p2
##Plot 4
p3<- plot_ly(data =total, x = ~Wday, y = ~Fatalities) %>%
add_trace(y = ~Fatalities, name = 'Days') %>%
layout(title = "Fatalities(Daily)", yaxis = list(title = "Number of Fatalities"),
xaxis = list(title = "Days on which the Fatalities took place"))
p3