This R Markdown discusses various datasets pertaining to the logistics at US Air Force bases. The data dictionary can be found at Data
library(knitr) # To allow the use of code chunks in the Rmd File
library(tibble)
library(tidyr)
library(dplyr) # For Data Manipuation
library(ggplot2) # To create visualizations
library(scales)# To manipulate the scales on each of the ggplot graphs
library(grid) # To manipulate details about the grid containing multiple scatter plots
library(gridExtra) # Manipulation of the grid
library(magrittr) # To use other facets & operators working with dplyr
read_chunk("/Users/aditya/Desktop/CourseWork/Data_Wrangling/Week_5/Week_5.R") # Use the script for the Week 5 assignment
bmbr_wide <- readRDS("/Users/aditya/Desktop/CourseWork/Data_Wrangling/Week_5/Data/bomber_wide.rds")
as_tibble(bmbr_wide)
## # A tibble: 3 × 21
## Type MD `1996` `1997` `1998` `1999` `2000` `2001` `2002` `2003`
## * <chr> <chr> <int> <int> <int> <int> <int> <int> <int> <int>
## 1 Bomber B-1 26914 25219 24205 23306 25013 25059 26581 21491
## 2 Bomber B-2 2364 2776 2166 3672 4543 4754 5969 6801
## 3 Bomber B-52 28511 26034 25639 24500 24387 24813 36687 30230
## # ... with 11 more variables: `2004` <int>, `2005` <int>, `2006` <int>,
## # `2007` <int>, `2008` <int>, `2009` <int>, `2010` <int>, `2011` <int>,
## # `2012` <int>, `2013` <int>, `2014` <int>
bmbr_long <- bmbr_wide %>% gather(Year, FH, 3:ncol(bmbr_wide))
head(bmbr_long)
## Type MD Year FH
## 1 Bomber B-1 1996 26914
## 2 Bomber B-2 1996 2364
## 3 Bomber B-52 1996 28511
## 4 Bomber B-1 1997 25219
## 5 Bomber B-2 1997 2776
## 6 Bomber B-52 1997 26034
bomber_long <- readRDS("/Users/aditya/Desktop/CourseWork/Data_Wrangling/Week_5/Data/bomber_long.rds")
as_tibble(bomber_long)
## # A tibble: 171 × 5
## Type MD FY Output Value
## <chr> <chr> <int> <chr> <int>
## 1 Bomber B-1 1996 FH 26914
## 2 Bomber B-1 1997 FH 25219
## 3 Bomber B-1 1998 FH 24205
## 4 Bomber B-1 1999 FH 23306
## 5 Bomber B-1 2000 FH 25013
## 6 Bomber B-1 2001 FH 25059
## 7 Bomber B-1 2002 FH 26581
## 8 Bomber B-1 2003 FH 21491
## 9 Bomber B-1 2004 FH 28118
## 10 Bomber B-1 2005 FH 21859
## # ... with 161 more rows
bomber_wide <- bomber_long %>% spread(Output,Value)
head(bomber_wide)
## Type MD FY Cost FH Gallons
## 1 Bomber B-1 1996 72753781 26914 88594449
## 2 Bomber B-1 1997 71297263 25219 85484074
## 3 Bomber B-1 1998 84026805 24205 85259038
## 4 Bomber B-1 1999 71848336 23306 79323816
## 5 Bomber B-1 2000 58439777 25013 86230284
## 6 Bomber B-1 2001 94946077 25059 86892432
bomber_combined <- readRDS("/Users/aditya/Desktop/CourseWork/Data_Wrangling/Week_5/Data/bomber_combined.rds")
as_tibble(bomber_combined)
## # A tibble: 57 × 5
## AC FY Cost FH Gallons
## * <chr> <int> <int> <int> <int>
## 1 Bomber B-1 1996 72753781 26914 88594449
## 2 Bomber B-1 1997 71297263 25219 85484074
## 3 Bomber B-1 1998 84026805 24205 85259038
## 4 Bomber B-1 1999 71848336 23306 79323816
## 5 Bomber B-1 2000 58439777 25013 86230284
## 6 Bomber B-1 2001 94946077 25059 86892432
## 7 Bomber B-1 2002 96458536 26581 89198262
## 8 Bomber B-1 2003 68650070 21491 74485788
## 9 Bomber B-1 2004 101895634 28118 101397707
## 10 Bomber B-1 2005 124816690 21859 78410415
## # ... with 47 more rows
bmbr_separate <- bomber_combined %>% separate(AC, c("Type","MD"), sep = " ")
head(bmbr_separate)
## Type MD FY Cost FH Gallons
## 1 Bomber B-1 1996 72753781 26914 88594449
## 2 Bomber B-1 1997 71297263 25219 85484074
## 3 Bomber B-1 1998 84026805 24205 85259038
## 4 Bomber B-1 1999 71848336 23306 79323816
## 5 Bomber B-1 2000 58439777 25013 86230284
## 6 Bomber B-1 2001 94946077 25059 86892432
bomber_prefix <- readRDS("/Users/aditya/Desktop/CourseWork/Data_Wrangling/Week_5/Data/bomber_prefix.rds")
as_tibble(bomber_prefix)
## # A tibble: 171 × 6
## Type prefix number FY Output Value
## * <chr> <chr> <chr> <int> <chr> <int>
## 1 Bomber B 1 1996 FH 26914
## 2 Bomber B 1 1997 FH 25219
## 3 Bomber B 1 1998 FH 24205
## 4 Bomber B 1 1999 FH 23306
## 5 Bomber B 1 2000 FH 25013
## 6 Bomber B 1 2001 FH 25059
## 7 Bomber B 1 2002 FH 26581
## 8 Bomber B 1 2003 FH 21491
## 9 Bomber B 1 2004 FH 28118
## 10 Bomber B 1 2005 FH 21859
## # ... with 161 more rows
bmbr_prefix <- bomber_prefix %>% unite(MD,prefix,number,sep="-")
head(bmbr_prefix)
## Type MD FY Output Value
## 1 Bomber B-1 1996 FH 26914
## 2 Bomber B-1 1997 FH 25219
## 3 Bomber B-1 1998 FH 24205
## 4 Bomber B-1 1999 FH 23306
## 5 Bomber B-1 2000 FH 25013
## 6 Bomber B-1 2001 FH 25059
We clean up the data using a variety of operators and manipulations. Post that we look at historical trends facetted by Cost,FH and Gallons variables
bmbr_mess <- readRDS("/Users/aditya/Desktop/CourseWork/Data_Wrangling/Week_5/Data/bomber_mess.rds")
as_tibble(bmbr_mess)
## # A tibble: 171 × 5
## Type prefix number Metric Value
## * <chr> <chr> <chr> <chr> <int>
## 1 Bomber B 1 1996_FH 26914
## 2 Bomber B 1 1997_FH 25219
## 3 Bomber B 1 1998_FH 24205
## 4 Bomber B 1 1999_FH 23306
## 5 Bomber B 1 2000_FH 25013
## 6 Bomber B 1 2001_FH 25059
## 7 Bomber B 1 2002_FH 26581
## 8 Bomber B 1 2003_FH 21491
## 9 Bomber B 1 2004_FH 28118
## 10 Bomber B 1 2005_FH 21859
## # ... with 161 more rows
bmbr_clean <- bmbr_mess %>% unite(MD,prefix,number,sep="-") %>%
separate(Metric,c("FY","Output"),sep="_") %>%
mutate(FY=as.Date(paste0(FY,"-01-01")))
ggplot(bmbr_clean,aes(x=FY,y=Value,group=MD,colour=MD)) + geom_line() +
facet_wrap(~Output,scales = "free",ncol=1) +
scale_x_date(breaks = date_breaks("3 years"),date_labels = "%Y") +
scale_y_continuous(labels = unit_format("k",1e-6)) +
ggtitle("Historical Trends for various metrics") +
xlab("Year")
bmbr_clean <- bmbr_clean %>% spread(Output,Value)
head(bmbr_clean)
## Type MD FY Cost FH Gallons
## 1 Bomber B-1 1996-01-01 72753781 26914 88594449
## 2 Bomber B-1 1997-01-01 71297263 25219 85484074
## 3 Bomber B-1 1998-01-01 84026805 24205 85259038
## 4 Bomber B-1 1999-01-01 71848336 23306 79323816
## 5 Bomber B-1 2000-01-01 58439777 25013 86230284
## 6 Bomber B-1 2001-01-01 94946077 25059 86892432
ws_programmatics <- readRDS("/Users/aditya/Desktop/CourseWork/Data_Wrangling/Week_5/Data/ws_programmatics.rds")
ws_categorization <- readRDS("/Users/aditya/Desktop/CourseWork/Data_Wrangling/Week_5/Data/ws_categorizations.rds")
ws_final <- ws_programmatics %>% full_join(ws_categorization,by=c("MD","Base")) %>%
filter(FY==2014 & Base=="MINOT AFB (ND)") %>%
filter(System %in% c("AIRCRAFT","MISSILES")) %>%
group_by(System) %>%
summarise(Tot_Sum = sum(Total_O.S, na.rm = T) + sum(End_Strength, na.rm = T))
ws_final
## # A tibble: 2 × 2
## System Tot_Sum
## <chr> <dbl>
## 1 AIRCRAFT 297400258
## 2 MISSILES 112226860
ws_fnl <- ws_programmatics %>% full_join(ws_categorization,by=c("MD","Base")) %>%
filter(FY==2014) %>%
group_by(Base) %>%
select(Base,Total_O.S,FH) %>%
summarise(Total_O.S=sum(Total_O.S,na.rm = T), FH=sum(FH,na.rm = T)) %>%
filter(Total_O.S > 0 & FH > 0) %>%
mutate(CPFH=Total_O.S/FH) %>%
arrange(-CPFH)
top_base <- ws_fnl %>% top_n(n=10)
## Selecting by CPFH
ggplot(top_base, aes(x=reorder(Base,CPFH),y=CPFH)) + geom_bar(stat="identity",width = 0.5,fill = "#56B4E9") +
coord_flip() + scale_y_continuous(labels = comma) + ggtitle("Top 10 bases by Cost per Flying Hour") +
ylab("Cost per Flying Hour") + xlab("Air Force Base")
Base which has the highest Cost per Flying Hour is PATRICK AFB (FL) with the Cost per Flying Hour being $1.789610^{5}
ws_scatter <- ws_programmatics %>% full_join(ws_categorization,by=c("MD","Base"))
p1 <- ggplot(ws_scatter,aes(x=Total_O.S,y=End_Strength)) + geom_point(aes(colour=System)) +
scale_x_continuous(labels = unit_format("MM",1e-6), limits = c(0,1100000000)) + ylab("Personnel") +
xlab("Operational Cost on the base") +
ggtitle("Relationship between Operational Cost and Personnel")
p2 <- ggplot(ws_scatter,aes(x=Total_O.S,y=End_Strength)) + geom_point(aes(colour=FY)) +
scale_x_continuous(labels = unit_format("MM",1e-6),limits = c(0,1100000000))+ ylab("Personnel") +
xlab("Operational Cost on the base")
p3 <- ggplot(ws_scatter,aes(x=Total_O.S,y=End_Strength)) + geom_point(aes(colour=FH)) +
scale_x_continuous(labels = unit_format("MM",1e-6),limits = c(0,1100000000))+ ylab("Personnel") +
xlab("Operational Cost on the base")
grid.arrange(p1,p2,p3,ncol=1)
The correlation between Head Count & Total Operational Cost is 0.8210234