---
title: "ANLY 512 Lab 2"
subtitle: Financial Dashboard
author: "Mengyuan Huang, Ziyan Zhang"
date: "`r Sys.Date()`"
output:
flexdashboard::flex_dashboard:
orientation: columns
vertical_layout: fill
social: menu
source_code: embed
---
```{r}
# get required packages
library(ggplot2)
library(plotly)
library(dplyr)
library(flexdashboard)
# load data
# data source https://www1.ncdc.noaa.gov/pub/data/swdi/stormevents/csvfiles/
s2015 <- read.csv("StormEvents_2015.csv")
s2016 <- read.csv("StormEvents_2016.csv")
s2017 <- read.csv("StormEvents_2017.csv")
s2018 <- read.csv("StormEvents_2018.csv")
s2019 <- read.csv("StormEvents_2019.csv") # through Sept 2019
storm_data <- rbind(s2015, s2016, s2017, s2018, s2019)
rm(s2015); rm(s2016); rm(s2017); rm(s2018); rm(s2019)
storm_data$YEARMONTH = as.character(storm_data$YEARMONTH)
storm_data$YEAR = as.integer(substr(storm_data$YEARMONTH, 1, 4))
storm_data$MONTH = as.integer(substr(storm_data$YEARMONTH, 5, 6))
storm_data$EPISODE_ID = as.character(storm_data$EPISODE_ID)
storm_data$EVENT_ID = as.character(storm_data$EVENT_ID)
# prepare data for visualization
# Visualization 1 - text - total # storm events in 2019 thru Sept 2019
v1_data <- storm_data %>%
filter(YEAR == 2019) %>%
group_by(YEAR) %>%
summarise(n_events_2019 = n_distinct(EVENT_ID))
# Visualization 2 - # storm episodes by month by year
## line chart, x - month, y - n_events, color by year
v2_data <- storm_data %>%
group_by(YEAR, MONTH) %>%
summarise(n_events = n_distinct(EVENT_ID))
# Visualization 3 - top locations with most storm episodes in 2019
## bar chart, x - location, y - n_events, order by n_events desc
v3_data <- storm_data %>%
filter(YEAR == 2019) %>%
group_by(LOCATION) %>%
summarise(n_events = n_distinct(EVENT_ID)) %>%
arrange(desc(n_events)) %>%
top_n(30)
# Visualzation 4 - Sep 2019 storm range vs. where, when it's generated
## scatter plot, x - Longitude, y - Latitude
v4_data <- storm_data %>%
filter(LOCATION_INDEX == 1) %>%
filter(YEARMONTH == '201909') %>%
select(RANGE, LATITUDE, LONGITUDE)
# Visualization 5 - Summary by location
## Table - Year, Location, n_events, mean_range, max_range
v5_data <- storm_data %>%
group_by(LOCATION) %>%
summarise(
min_range = min(RANGE),
mean_range = round(mean(RANGE), 2),
median_range = median(RANGE),
max_range = max(RANGE),
n_events = n_distinct(EVENT_ID)
) %>%
arrange(desc(median_range))
colnames(v5_data) <- c("Location", "Min. Storm Range",
"Mean Storm Range", "Median Storm Range",
"Max. Storm Range", "Storm Event Count")
# Visualization 6 - YoY Huge storm events count by top location
range_q1 = quantile(storm_data$RANGE, 0.25)
range_q3 = quantile(storm_data$RANGE, 0.75)
v6_data <- storm_data %>%
filter(RANGE > range_q3 + 1.5 * (range_q3 - range_q1)) %>%
group_by(LOCATION) %>%
summarise(all_events = n_distinct(EVENT_ID)) %>%
arrange(desc(all_events)) %>%
top_n(5) %>%
inner_join(storm_data, by = "LOCATION") %>%
filter(RANGE > range_q3 + 1.5 * (range_q3 - range_q1)) %>%
group_by(LOCATION, YEAR) %>%
summarise(n_events = n_distinct(EVENT_ID))
# Visualization 7 - Storm Range by Location
## Boxplot, x - top location, y - storm range
v7_temp <- storm_data %>%
group_by(LOCATION) %>%
summarise(n_events = n_distinct(EVENT_ID)) %>%
arrange(desc(n_events)) %>%
top_n(30)
v7_data <- v7_temp %>%
inner_join(storm_data, by = "LOCATION") %>%
arrange(desc(n_events))
```
Column {data-width=250}
-----------------------------------------------------------------------
### Total # Storms in 2019 (thru Sept 2019)
```{r}
valueBox(
value = v1_data$n_events_2019,
icon = "fa-area-chart"
)
```
### Top 30 Locations with Most Storm Events in 2019
```{r}
ggplotly(
ggplot(data = v3_data, aes(x = factor(LOCATION, level = rev(v3_data$LOCATION)), y = n_events)) +
geom_bar(stat = "identity", color = 'indianred4', fill = 'indianred3', width = 0.8) +
xlab("Location") +
theme(axis.text.x = element_text(angle = 60)) +
ylab("Storm Event Count") +
coord_cartesian(ylim = c(20, 60)) +
coord_flip()
)
```
Column {data-width=350}
-----------------------------------------------------------------------
### YoY Huge Storm Event Count by Huge Storm Top Locations
```{r}
ggplotly(
ggplot(data = v6_data, aes(x = factor(YEAR), y = n_events, group = factor(LOCATION))) +
geom_line(aes(color = factor(LOCATION))) +
xlab("YEAR") +
ylab("Huge Storm Event Count \n Huge Storm is defined by storm range larger than 1.5 IQR above 3rd quantile") +
guides(color = guide_legend(title="Huge Storm Top Locations"))
)
```
### Storm Event Count by Month and Year
```{r}
ggplotly(
ggplot(data = v2_data, aes(x = factor(MONTH), y = n_events, group = factor(YEAR))) +
geom_line(aes(color = factor(YEAR))) +
xlab("Month") +
ylab("Storm Evnet Count") +
guides(color = guide_legend(title="Year"))
)
```
Column {data-width=500}
-----------------------------------------------------------------------
### Storm Geolocation for storms in Sept 2019
```{r}
plot_ly(data = v4_data, x = ~LONGITUDE, y = ~LATITUDE,
type = 'scatter', mode = 'markers',
size = ~RANGE, alpha = 0.8,
color = ~RANGE, colors = rainbow(6),
name = "Storm Range")
```
### 2015 - 2019 Storm Summary by Location
```{r}
DT::datatable(v5_data)
#ggplotly(
# ggplot(v7_data, aes(x = factor(LOCATION, levels = v6_temp$LOCATION), y = RANGE)) +
# geom_boxplot(aes(fill = n_events)) +
# theme(axis.text.x = element_text(angle = 60)) +
# scale_fill_gradient(low = "green", high = "orange") +
# xlab("Location") +
# ylab("Storm Range") +
# guides(color = guide_legend(title="Storm Events Count"))
#)
```