library(tidyverse)
library(tidyr) # Using functions for reshaping data
library(dplyr) # Using functions for EDA
library(ggplot2) # Visualization functions
Wide to long transformation
bomber_wide <- as_tibble(readRDS("bomber_data/bomber_wide.rds"))
wide_to_long <- bomber_wide %>%
gather(`1996`:`2014`,key="year",value="FH")
head(wide_to_long)
## # A tibble: 6 × 4
## Type MD year FH
## <chr> <chr> <chr> <int>
## 1 Bomber B-1 1996 26914
## 2 Bomber B-2 1996 2364
## 3 Bomber B-52 1996 28511
## 4 Bomber B-1 1997 25219
## 5 Bomber B-2 1997 2776
## 6 Bomber B-52 1997 26034
Long to wide transformation
bomber_long <- as_tibble(readRDS("bomber_data/bomber_long.rds"))
wide_to_long <- bomber_long %>%
spread(value="Value",key="Output")
head(wide_to_long)
## # A tibble: 6 × 6
## Type MD FY Cost FH Gallons
## <chr> <chr> <int> <int> <int> <int>
## 1 Bomber B-1 1996 72753781 26914 88594449
## 2 Bomber B-1 1997 71297263 25219 85484074
## 3 Bomber B-1 1998 84026805 24205 85259038
## 4 Bomber B-1 1999 71848336 23306 79323816
## 5 Bomber B-1 2000 58439777 25013 86230284
## 6 Bomber B-1 2001 94946077 25059 86892432
Separate a column
bomber_combined <- as_tibble(readRDS("bomber_data/bomber_combined.rds"))
bomber_separate <- bomber_combined %>%
separate(AC,into = c("Type","MD"),sep=" ")
head(bomber_separate)
## # A tibble: 6 × 6
## Type MD FY Cost FH Gallons
## <chr> <chr> <int> <int> <int> <int>
## 1 Bomber B-1 1996 72753781 26914 88594449
## 2 Bomber B-1 1997 71297263 25219 85484074
## 3 Bomber B-1 1998 84026805 24205 85259038
## 4 Bomber B-1 1999 71848336 23306 79323816
## 5 Bomber B-1 2000 58439777 25013 86230284
## 6 Bomber B-1 2001 94946077 25059 86892432
Combine two columns
bomber_prefix <- as_tibble(readRDS("bomber_data/bomber_prefix.rds"))
bomber_unite <- bomber_prefix %>%
unite(MD,prefix,number,sep="-")
head(bomber_unite)
## # A tibble: 6 × 5
## Type MD FY Output Value
## <chr> <chr> <int> <chr> <int>
## 1 Bomber B-1 1996 FH 26914
## 2 Bomber B-1 1997 FH 25219
## 3 Bomber B-1 1998 FH 24205
## 4 Bomber B-1 1999 FH 23306
## 5 Bomber B-1 2000 FH 25013
## 6 Bomber B-1 2001 FH 25059
Clean and plot trend
bomber_mess <- as_tibble(readRDS("bomber_data/bomber_mess.rds"))
bomber_mess_clean <- bomber_mess %>%
unite(MD,prefix,number,sep="-") %>%
separate(Metric,into = c("year","categories")) %>%
spread(key="categories",value="Value")
head(bomber_mess_clean)
## # A tibble: 6 × 6
## Type MD year Cost FH Gallons
## <chr> <chr> <chr> <int> <int> <int>
## 1 Bomber B-1 1996 72753781 26914 88594449
## 2 Bomber B-1 1997 71297263 25219 85484074
## 3 Bomber B-1 1998 84026805 24205 85259038
## 4 Bomber B-1 1999 71848336 23306 79323816
## 5 Bomber B-1 2000 58439777 25013 86230284
## 6 Bomber B-1 2001 94946077 25059 86892432
bomber_mess %>%
unite(MD,prefix,number,sep="-") %>%
separate(Metric,into = c("year","categories")) %>%
ggplot()+
geom_point(aes(x=year,y=Value,colour=MD,group=MD))+
geom_line(mapping= aes(x=year,y=Value,colour=MD,group=MD)) +
#geom_smooth()+
facet_wrap(~categories,ncol=1,scales = "free")
Join two relational tables and summarize
ws_categorization <- as_tibble(readRDS("bomber_data/ws_categorizations.rds"))
ws_programmatics <- as_tibble(readRDS("bomber_data/ws_programmatics.rds"))
question_6 <- ws_programmatics %>%
left_join(ws_categorization) %>%
filter(FY==2014,Base=="MINOT AFB (ND)",System %in% c("AIRCRAFT", "MISSILES")) %>%
group_by(System) %>%
summarize(sum_os=sum(Total_O.S,na.rm=TRUE),sum_end_strength=sum(End_Strength,na.rm=TRUE))
head(question_6)
## # A tibble: 2 × 3
## System sum_os sum_end_strength
## <chr> <dbl> <dbl>
## 1 AIRCRAFT 297398235 2023
## 2 MISSILES 112224909 1951
Join two relational tables, summarize and visualize results
ws_programmatics %>%
left_join(ws_categorization) %>%
filter(FY==2014) %>%
group_by(Base) %>%
summarize(CPFH=(sum(Total_O.S,na.rm=TRUE)/sum(FH,na.rm=TRUE))) %>%
filter(CPFH != Inf) %>%
arrange(desc(CPFH)) %>%
top_n(1,CPFH)
## # A tibble: 1 × 2
## Base CPFH
## <chr> <dbl>
## 1 PATRICK AFB (FL) 178960
# Top 10 Base stations with highest Cost per Flying hours
ws_programmatics %>%
left_join(ws_categorization) %>%
filter(FY==2014) %>%
group_by(Base) %>%
summarize(CPFH=(sum(Total_O.S,na.rm=TRUE)/sum(FH,na.rm=TRUE))) %>%
filter(CPFH != Inf) %>%
arrange(desc(CPFH)) %>%
top_n(10,CPFH) %>%
ggplot()+
geom_bar(mapping=aes(x=reorder(Base,-CPFH), y=CPFH),stat="identity",col="yellow")
Correlation between Total_O.S and End Strength for every year
ws_programmatics %>%
left_join(ws_categorization) %>%
ggplot()+
geom_point(mapping=aes(x=Total_O.S,y=End_Strength,colour=FY))+
#coord_cartesian(xlim=1000000000,expand=TRUE)+
facet_wrap(~FY)
Correlation between Total_O.S and End Strength for each system type
ws_programmatics %>%
left_join(ws_categorization) %>%
ggplot()+
geom_point(mapping=aes(x=Total_O.S,y=End_Strength,colour=System))+
facet_wrap(~System)
Correlation between Total_O.S and End Strength for top 12 base stations by cost
Top_base_cost <- ws_programmatics %>%
group_by(Base) %>%
summarize(Base_cost=sum(Total_O.S,na.rm=TRUE)) %>%
mutate(base_cost_rank= dense_rank(desc(Base_cost))) %>%
arrange(base_cost_rank)
ws_programmatics %>%
left_join(Top_base_cost,by="Base") %>%
filter(base_cost_rank<=12) %>%
ggplot()+
geom_point(mapping=aes(x=Total_O.S,y=End_Strength,colour=Base))+
facet_wrap(~Base)
Correlation between Total_O.S and End Strength for top 12 MD’s by cost
Top_MD_cost <- ws_programmatics %>%
group_by(MD) %>%
summarize(MD_cost=sum(Total_O.S,na.rm=TRUE)) %>%
mutate(MD_cost_rank= dense_rank(desc(MD_cost))) %>%
arrange(MD_cost_rank)
ws_programmatics %>%
left_join(Top_MD_cost,by="MD") %>%
filter(MD_cost_rank<=12) %>%
ggplot()+
geom_point(mapping=aes(x=Total_O.S,y=End_Strength,colour=MD))+
facet_wrap(~MD)