Accident Category Human and Economic

Column

What is the human and economic cost per platform unit type and Accident Category?

Column

What is the human and economic cost per platform Main Event and Accident Category?

What is the human and economic cost per platform function and Accident Category?

Decision Tree

Column

Formula: HumanCost ~ WindSpeed_m_s + WaterDepth_m + DrillDepth_km, method = “poisson”, data = with outliers

Column

Formula: formula = HumanCost ~ WindSpeed_m_s, data = no outliers

Formula: HumanCost ~ WindSpeed_m_s + WaterDepth_m + DrillDepth_km + as.factor(AccidentCategory), data = with outliers

Damage Human and Economic Cost

Column

Which major operation causes the most cost in accidents in percentiles?

Column

Which Function causes the most cost in accidents in percentiles?

Which Main events causes the most cost in accidents in percentiles?

Heatmap

Column

Heatmap by Main Operation and Damage Cost in million

Column

Heatmap by Main Events and Damage Cost in million

---
title: "WOAD 45 Years Data analysis"
author: "Viviane Schneider"
output:  
  flexdashboard::flex_dashboard:
    orientation: columns
    vertical_layout: fill
    social: menu
    source_code: embed
---

```{r setup,  include=FALSE, cache=TRUE}




library(rattle)
library(caret)
library(rpart)
library(rpart.plot)
library(corrplot)
library(randomForest)
library(RColorBrewer)
library(dplyr)
library(plotly)
library(ggplot2)
library(tidyr)
library(magrittr)
library(plotrix)
library(rgl)
library(lubridate)
library(ggplot2)
library(GGally)
library(corrplot)
library(corrgram)
library(ppcor)
library(readr)
library(ggvis)
library(gganimate)
library(gifski)
library(av)
library(magick)
library(viridis)
library(hrbrthemes)
library(caret)


```





```{r}
setwd("~/Regression Models")
library(readr)
Cleaning_completo_woad <- read_delim("Cleaning_completo_woad.CSV", 
    ";", escape_double = FALSE, col_types = cols(
    DamageCost_million = col_number(),
    DrillDepth_km = col_number(), 
    WaterDepth_m = col_number(),
    WindSpeed_m_s= col_number(), 
    FatalitiesCrew = col_number(),
    Fatalities3rd_Party = col_number(),
    InjuriesCrew = col_number(),
    Injuries3rd_Party = col_number(),
    Accident_Date = col_date(format = "%m/%d/%Y")),
    trim_ws = TRUE)

panel.hist <- function(x, ...)
{
  usr <- par("usr"); on.exit(par(usr))
  par(usr = c(usr[1:2], 0, 1.5) )
  h <- hist(x, plot = FALSE)
  breaks <- h$breaks; nB <- length(breaks)
  y <- h$counts; y <- y/max(y)
  rect(breaks[-nB], 0, breaks[-1], y, col = "cyan", ...)
}

# 1.2 by Melina de Souza Leite 
panel.lm <- function (x, y, col = par("col"), bg = NA, pch = par("pch"), 
                      cex = 1, col.line="red") {
  points(x, y, pch = pch, col = col, bg = bg, cex = cex)
  ok <- is.finite(x) & is.finite(y)
  if (any(ok)) {
    abline(lm(y[ok]~x[ok]), col = col.line)
  }
}

# 1.3 help(pairs) by Melina de Souza Leite 
panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor, ...)
{
  usr <- par("usr"); on.exit(par(usr))
  par(usr = c(0, 1, 0, 1))
  r <- abs(cor(x, y))
  txt <- format(c(r, 0.123456789), digits = digits)[1]
  txt <- paste0(prefix, txt)
  if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt)
  text(0.5, 0.5, txt, cex = cex.cor * r)
}


# Transform vector into a data frame with frequency of levels and proportion
.Unianalysis = function (x) {
    y <- as.data.frame(as.table(table(x)))
    y <- mutate(y, percentual = prop.table(y$Freq) *100)#Proportion
    y <- arrange(y, desc(y$Freq))
return(y)
}

.BarPlot <- function (z) {
      z %>% 
    ggplot( aes(x=x, y=Freq)) +
    geom_bar(stat="identity", fill="#f68060", alpha=.6, width=.4) +
    coord_flip() +
    xlab("") +
    theme_bw()

}

```



```{r}

## Tidying data

trainVariable <- Cleaning_completo_woad %>%
              mutate(HumanCost = (FatalitiesCrew * 10) + (Fatalities3rd_Party * 10) + (InjuriesCrew * 3) +  (Injuries3rd_Party * 3)) %>%
              mutate(Year = year(Accident_Date)) %>% 
              mutate(Month =  month(Accident_Date))

by_Event_Function <-
        trainVariable %>% 
        group_by(MainEvent, Function,TypeofUnit, Damage,SpillType, AccidentCategory,Year, Month) %>%
        summarise(How_Many = n(),
                  Owner = n_distinct(Owner),
                  FatalitiesCrew = sum(FatalitiesCrew),
                  Fatalities3rd_Party = sum(Fatalities3rd_Party),
                  InjuriesCrew = sum(InjuriesCrew),
                  Injuries3rd_Party = sum(Injuries3rd_Party),
                  DamageCost_million= sum(DamageCost_million), 
                  DrillDepth_km = mean(DrillDepth_km), 
                  WaterDepth_m = mean(WaterDepth_m),
                  WindSpeed_m_s= sum(WindSpeed_m_s),
                  HumanCost = sum(HumanCost)) %>%
       mutate(Mean_HumanCost = (FatalitiesCrew + Fatalities3rd_Party + InjuriesCrew + Injuries3rd_Party)/How_Many) %>%
       arrange(desc(How_Many))
   

write.csv2(by_Event_Function, file = "by_Event_Function.CSV")
write.csv2(trainVariable, file = "trainVariable.CSV")



damage <- cbind(by_Event_Function$Damage,
                by_Event_Function$How_Many,
                by_Event_Function$HumanCost,
                by_Event_Function$Mean_HumanCost,
                by_Event_Function$DamageCost_million)

colnames(damage) <- cbind("Damage Degree","Cccurrences", "Total Human Cost", "Mean Human Cost", "Cost in Milion")

damage <- as.data.frame(na.omit(damage))


OverView <- apply(Cleaning_completo_woad, 2, .Unianalysis)

OverView2 <- sapply(OverView, distinct)



by_Year <- trainVariable %>%
          group_by(Year) %>%
          summarise(Freq = n(),
           Owner = n_distinct(Owner),
           FatalitiesCrew = sum(FatalitiesCrew),
           Fatalities3rd_Party = sum(Fatalities3rd_Party),
           InjuriesCrew = sum(InjuriesCrew),
           Injuries3rd_Party = sum(Injuries3rd_Party),
           DamageCost_million= sum(DamageCost_million), 
           HumanCost = sum(HumanCost),   
           WindSpeed_m_s= mean(WindSpeed_m_s)) 
  
  
by_HumanCost <- trainVariable %>%
          filter(HumanCost > 0) %>%
          group_by(HumanCost, AccidentCategory, Damage) %>%
          summarise(Freq = n(),
           Owner = n_distinct(Owner),
           FatalitiesCrew = sum(FatalitiesCrew),
           Fatalities3rd_Party = sum(Fatalities3rd_Party),
           InjuriesCrew = sum(InjuriesCrew),
           Injuries3rd_Party = sum(Injuries3rd_Party),
           DamageCost_million= sum(DamageCost_million), 
           WindSpeed_m_s= mean(WindSpeed_m_s))  %>% 
           filter(Freq > 20)
           

by_Year<- by_Year %>% 
  mutate_all(replace_na, 0)

by_Event_Function <- by_Event_Function %>% 
  mutate_all(replace_na, 0)



```

Accident Category Human and Economic 
=======================================================================

Column {data-width=400}
-----------------------------------------------------------------------

###  What is the human and economic cost per platform unit type and Accident Category? 


```{r}


# FUNÇÃO Grafico do documento

library(plotly)
library(gapminder)
p <- trainVariable %>%
  plot_ly(
    x = ~HumanCost, 
    y = ~TypeofUnit, 
    size = ~DamageCost_million, 
    color = ~as.factor(AccidentCategory), 
    frame = ~DamageCost_million, 
    text = ~DamageCost_million
    , 
    hoverinfo = "text",
    type = 'scatter',
    mode = 'markers'
  ) %>%
  layout(
    xaxis = list(
      type = "log"
    )
  )
p




```




Column {data-width=300}
-----------------------------------------------------------------------


### What is the human and economic cost per platform Main Event and Accident Category?

```{r}

p <- trainVariable %>%
  plot_ly(
    x = ~HumanCost, 
    y = ~MainEvent, 
    size = ~DamageCost_million, 
    color = ~as.factor(AccidentCategory), 
    frame = ~DamageCost_million, 
    text = ~DamageCost_million
    , 
    hoverinfo = "text",
    type = 'scatter',
    mode = 'markers'
  ) %>%
  layout(
    xaxis = list(
      type = "log"
    )
  )
p

```



### What is the human and economic cost per platform function and Accident Category?

```{r}

p <- trainVariable %>%
  plot_ly(
    x = ~HumanCost, 
    y = ~Function, 
    size = ~DamageCost_million, 
    color = ~as.factor(AccidentCategory), 
    frame = ~DamageCost_million, 
    text = ~DamageCost_million
    , 
    hoverinfo = "text",
    type = 'scatter',
    mode = 'markers'
  ) %>%
  layout(
    xaxis = list(
      type = "log"
    )
  )
p

```


Decision Tree
=======================================================================

Column {data-width=600}
-----------------------------------------------------------------------

### Formula: HumanCost ~ WindSpeed_m_s + WaterDepth_m + DrillDepth_km, method = "poisson", data = with outliers

```{r}
library(rpart)
library(rpart.plot)

arvore <- rpart(HumanCost ~ WindSpeed_m_s + WaterDepth_m + DrillDepth_km, method = "poisson", data = trainVariable)

rpart.plot(arvore)


```




Column {data-width=300}
-----------------------------------------------------------------------


### Formula: formula = HumanCost ~ WindSpeed_m_s, data = no outliers

```{r}
outliers <- boxplot(trainVariable$HumanCost, plot=FALSE)$out
no_outliers <- trainVariable 
no_outliers <-no_outliers[which(no_outliers$HumanCost %in% outliers),]

ver <- lm(formula = HumanCost ~ WindSpeed_m_s + as.factor(MainEvent) +  - 1, data = no_outliers)
arvore <- rpart(HumanCost ~ WindSpeed_m_s, data = no_outliers)
rpart.plot(arvore)

```

### Formula: HumanCost ~ WindSpeed_m_s + WaterDepth_m + DrillDepth_km + as.factor(AccidentCategory), data = with outliers


```{r}
arvore_V_E <- rpart(HumanCost ~ WindSpeed_m_s + WaterDepth_m + DrillDepth_km + as.factor(AccidentCategory) , data = trainVariable)
rpart.plot(arvore_V_E)

```




Damage Human and Economic Cost 
=======================================================================

Column {data-width=400}
-----------------------------------------------------------------------

###  Which major operation causes the most cost in accidents in percentiles? 

```{r}
ggplot(trainVariable, aes(fill=Damage, y=MainOperation, x=DamageCost_million)) + 
    geom_bar(position="fill", stat="identity") + ggtitle("Main Operation per cost in millions and type of damage") + xlab("Percentil Cost") + ylab("Main Operation")


```

Column {data-width=400}
-----------------------------------------------------------------------

###  Which Function causes the most cost in accidents in percentiles? 

```{r}

ggplot(trainVariable, aes(fill=Damage, y=Function, x=DamageCost_million)) + geom_bar(position="fill", stat="identity") + ggtitle("Funtions per Damage cost in millions") + xlab("Percentil") + ylab("Platform function")
```

###  Which Main events causes the most cost in accidents in percentiles? 

```{r}

ggplot(trainVariable, aes(fill=Damage, y=MainEvent, x=DamageCost_million)) + geom_bar(position="fill", stat="identity") + ggtitle("Main Events per cost in million") + xlab("Percentil") + ylab("Main Events")
```

Heatmap
=======================================================================


Column {data-width=600}
-----------------------------------------------------------------------

###  Heatmap by Main Operation and Damage Cost in million

```{r}
library("lattice")
 
# Dummy data
data <- trainVariable %>% 
         filter(DamageCost_million > 40)
data <- table(data$MainOperation, data$DamageCost_million)


# plot it flipping the axis
levelplot( t(data[c(nrow(data):1) , ]),
           col.regions=heat.colors(100), pretty = TRUE, ylab = "Main Operation",
            xlab = "Damage Cost per million",) 

```

Column {data-width=600}
-----------------------------------------------------------------------

###  Heatmap by Main Events and Damage Cost in million


```{r}
library("lattice")
 
# Dummy data
data <- trainVariable %>% 
         filter(DamageCost_million > 40)
data <- table(data$MainEvent, data$DamageCost_million)


# plot it flipping the axis
levelplot( t(data[c(nrow(data):1) , ]),
           col.regions=heat.colors(100), pretty = TRUE, ylab = "Main Operation",
            xlab = "Damage Cost per million",) 

```