ANLY 512 Lab 2

---
title: "ANLY 512 Lab 2"
subtitle: Financial Dashboard
author: "Mengyuan Huang, Ziyan Zhang"
date: "`r Sys.Date()`"
output: 
  flexdashboard::flex_dashboard:
    orientation: columns
    vertical_layout: fill
    social: menu
    source_code: embed
---

```{r}
# get required packages
library(ggplot2)
library(plotly)
library(dplyr)
library(flexdashboard)

# load data
# data source https://www1.ncdc.noaa.gov/pub/data/swdi/stormevents/csvfiles/
s2015 <- read.csv("StormEvents_2015.csv")
s2016 <- read.csv("StormEvents_2016.csv")
s2017 <- read.csv("StormEvents_2017.csv")
s2018 <- read.csv("StormEvents_2018.csv")
s2019 <- read.csv("StormEvents_2019.csv") # through Sept 2019

storm_data <- rbind(s2015, s2016, s2017, s2018, s2019)
rm(s2015); rm(s2016); rm(s2017); rm(s2018); rm(s2019)
storm_data$YEARMONTH = as.character(storm_data$YEARMONTH)
storm_data$YEAR = as.integer(substr(storm_data$YEARMONTH, 1, 4))
storm_data$MONTH = as.integer(substr(storm_data$YEARMONTH, 5, 6))
storm_data$EPISODE_ID = as.character(storm_data$EPISODE_ID)
storm_data$EVENT_ID = as.character(storm_data$EVENT_ID)

# prepare data for visualization
# Visualization 1 - text - total # storm events in 2019 thru Sept 2019
v1_data <- storm_data %>%
  filter(YEAR == 2019) %>%
  group_by(YEAR) %>% 
  summarise(n_events_2019 = n_distinct(EVENT_ID))


# Visualization 2 - # storm episodes by month by year
## line chart, x - month, y - n_events, color by year
v2_data <- storm_data %>%
  group_by(YEAR, MONTH) %>% 
  summarise(n_events = n_distinct(EVENT_ID))


# Visualization 3 - top locations with most storm episodes in 2019
## bar chart, x - location, y - n_events, order by n_events desc
v3_data <- storm_data %>%
  filter(YEAR == 2019) %>%
  group_by(LOCATION) %>%
  summarise(n_events = n_distinct(EVENT_ID)) %>%
  arrange(desc(n_events)) %>%
  top_n(30)


# Visualzation 4 - Sep 2019 storm range vs. where, when it's generated
## scatter plot, x - Longitude, y - Latitude
v4_data <- storm_data %>%
  filter(LOCATION_INDEX == 1) %>%
  filter(YEARMONTH == '201909') %>%
  select(RANGE, LATITUDE, LONGITUDE)

# Visualization 5 - Summary by location
## Table - Year, Location, n_events, mean_range, max_range
v5_data <- storm_data %>%
  group_by(LOCATION) %>%
  summarise(
    min_range = min(RANGE),
    mean_range = round(mean(RANGE), 2),
    median_range = median(RANGE),
    max_range = max(RANGE),
    n_events = n_distinct(EVENT_ID)
  ) %>%
  arrange(desc(median_range))
colnames(v5_data) <- c("Location", "Min. Storm Range",
                       "Mean Storm Range", "Median Storm Range",
                       "Max. Storm Range", "Storm Event Count")

# Visualization 6 - YoY Huge storm events count by top location
range_q1 = quantile(storm_data$RANGE, 0.25)
range_q3 = quantile(storm_data$RANGE, 0.75)
v6_data <- storm_data %>%
  filter(RANGE > range_q3 + 1.5 * (range_q3 - range_q1)) %>%
  group_by(LOCATION) %>%
  summarise(all_events = n_distinct(EVENT_ID)) %>%
  arrange(desc(all_events)) %>%
  top_n(5) %>%
  inner_join(storm_data, by = "LOCATION") %>%
  filter(RANGE > range_q3 + 1.5 * (range_q3 - range_q1)) %>%
  group_by(LOCATION, YEAR) %>%
  summarise(n_events = n_distinct(EVENT_ID))

# Visualization 7 - Storm Range by Location
## Boxplot, x - top location, y - storm range
v7_temp <- storm_data %>%
  group_by(LOCATION) %>%
  summarise(n_events = n_distinct(EVENT_ID)) %>%
  arrange(desc(n_events)) %>%
  top_n(30) 
v7_data <- v7_temp %>%
  inner_join(storm_data, by = "LOCATION") %>%
  arrange(desc(n_events))
```


Column {data-width=250}
-----------------------------------------------------------------------

### Total # Storms in 2019 (thru Sept 2019)

```{r}
valueBox(
  value = v1_data$n_events_2019,
  icon = "fa-area-chart"
)
```

### Top 30 Locations with Most Storm Events in 2019

```{r}
ggplotly(
  ggplot(data = v3_data, aes(x = factor(LOCATION, level = rev(v3_data$LOCATION)), y = n_events)) +
    geom_bar(stat = "identity", color = 'indianred4', fill = 'indianred3', width = 0.8) +
    xlab("Location") +
    theme(axis.text.x = element_text(angle = 60)) +
    ylab("Storm Event Count") +
    coord_cartesian(ylim = c(20, 60)) +
    coord_flip()
)
```


Column {data-width=350}
-----------------------------------------------------------------------

### YoY Huge Storm Event Count by Huge Storm Top Locations
```{r}
ggplotly(
  ggplot(data = v6_data, aes(x = factor(YEAR), y = n_events, group = factor(LOCATION))) +
    geom_line(aes(color = factor(LOCATION))) +
    xlab("YEAR") +
    ylab("Huge Storm Event Count \n Huge Storm is defined by storm range larger than 1.5 IQR above 3rd quantile") +
    guides(color = guide_legend(title="Huge Storm Top Locations"))
)
```

### Storm Event Count by Month and Year

```{r}
ggplotly(
  ggplot(data = v2_data, aes(x = factor(MONTH), y = n_events, group = factor(YEAR))) +
    geom_line(aes(color = factor(YEAR))) +
    xlab("Month") +
    ylab("Storm Evnet Count") +
    guides(color = guide_legend(title="Year"))
)
```


Column {data-width=500}
-----------------------------------------------------------------------

### Storm Geolocation for storms in Sept 2019

```{r}
plot_ly(data = v4_data, x = ~LONGITUDE, y = ~LATITUDE,
        type = 'scatter', mode = 'markers',
        size = ~RANGE, alpha = 0.8,
        color = ~RANGE, colors = rainbow(6),
        name = "Storm Range")
```

### 2015 - 2019 Storm Summary by Location

```{r}
DT::datatable(v5_data)

#ggplotly(
#  ggplot(v7_data, aes(x = factor(LOCATION, levels = v6_temp$LOCATION), y = RANGE)) + 
#    geom_boxplot(aes(fill = n_events)) +
#    theme(axis.text.x = element_text(angle = 60)) +
#    scale_fill_gradient(low = "green", high = "orange") +
#    xlab("Location") +
#    ylab("Storm Range") +
#    guides(color = guide_legend(title="Storm Events Count"))
#)
```