Synopsis

This R Markdown discusses various datasets pertaining to the logistics at US Air Force bases. The data dictionary can be found at Data

Packages Required

library(knitr) # To allow the use of code chunks in the Rmd File
library(tibble)
library(tidyr)
library(dplyr) # For Data Manipuation
library(ggplot2) # To create visualizations
library(scales)# To manipulate the scales on each of the ggplot graphs
library(grid) # To manipulate details about the grid containing multiple scatter plots
library(gridExtra) # Manipulation of the grid
library(magrittr) # To use other facets & operators working with dplyr
read_chunk("/Users/aditya/Desktop/CourseWork/Data_Wrangling/Week_5/Week_5.R") # Use the script for the Week 5 assignment

Delving into the various datasets and questions

Wide to Long Format

bmbr_wide <- readRDS("/Users/aditya/Desktop/CourseWork/Data_Wrangling/Week_5/Data/bomber_wide.rds")
as_tibble(bmbr_wide)
## # A tibble: 3 × 21
##     Type    MD `1996` `1997` `1998` `1999` `2000` `2001` `2002` `2003`
## *  <chr> <chr>  <int>  <int>  <int>  <int>  <int>  <int>  <int>  <int>
## 1 Bomber   B-1  26914  25219  24205  23306  25013  25059  26581  21491
## 2 Bomber   B-2   2364   2776   2166   3672   4543   4754   5969   6801
## 3 Bomber  B-52  28511  26034  25639  24500  24387  24813  36687  30230
## # ... with 11 more variables: `2004` <int>, `2005` <int>, `2006` <int>,
## #   `2007` <int>, `2008` <int>, `2009` <int>, `2010` <int>, `2011` <int>,
## #   `2012` <int>, `2013` <int>, `2014` <int>
bmbr_long <- bmbr_wide %>% gather(Year, FH, 3:ncol(bmbr_wide))
head(bmbr_long)
##     Type   MD Year    FH
## 1 Bomber  B-1 1996 26914
## 2 Bomber  B-2 1996  2364
## 3 Bomber B-52 1996 28511
## 4 Bomber  B-1 1997 25219
## 5 Bomber  B-2 1997  2776
## 6 Bomber B-52 1997 26034

Long to Wide Format

bomber_long <- readRDS("/Users/aditya/Desktop/CourseWork/Data_Wrangling/Week_5/Data/bomber_long.rds")
as_tibble(bomber_long)
## # A tibble: 171 × 5
##      Type    MD    FY Output Value
##     <chr> <chr> <int>  <chr> <int>
## 1  Bomber   B-1  1996     FH 26914
## 2  Bomber   B-1  1997     FH 25219
## 3  Bomber   B-1  1998     FH 24205
## 4  Bomber   B-1  1999     FH 23306
## 5  Bomber   B-1  2000     FH 25013
## 6  Bomber   B-1  2001     FH 25059
## 7  Bomber   B-1  2002     FH 26581
## 8  Bomber   B-1  2003     FH 21491
## 9  Bomber   B-1  2004     FH 28118
## 10 Bomber   B-1  2005     FH 21859
## # ... with 161 more rows
bomber_wide <- bomber_long %>% spread(Output,Value)
head(bomber_wide)
##     Type  MD   FY     Cost    FH  Gallons
## 1 Bomber B-1 1996 72753781 26914 88594449
## 2 Bomber B-1 1997 71297263 25219 85484074
## 3 Bomber B-1 1998 84026805 24205 85259038
## 4 Bomber B-1 1999 71848336 23306 79323816
## 5 Bomber B-1 2000 58439777 25013 86230284
## 6 Bomber B-1 2001 94946077 25059 86892432

Use of separate function

bomber_combined <- readRDS("/Users/aditya/Desktop/CourseWork/Data_Wrangling/Week_5/Data/bomber_combined.rds")
as_tibble(bomber_combined)
## # A tibble: 57 × 5
##            AC    FY      Cost    FH   Gallons
## *       <chr> <int>     <int> <int>     <int>
## 1  Bomber B-1  1996  72753781 26914  88594449
## 2  Bomber B-1  1997  71297263 25219  85484074
## 3  Bomber B-1  1998  84026805 24205  85259038
## 4  Bomber B-1  1999  71848336 23306  79323816
## 5  Bomber B-1  2000  58439777 25013  86230284
## 6  Bomber B-1  2001  94946077 25059  86892432
## 7  Bomber B-1  2002  96458536 26581  89198262
## 8  Bomber B-1  2003  68650070 21491  74485788
## 9  Bomber B-1  2004 101895634 28118 101397707
## 10 Bomber B-1  2005 124816690 21859  78410415
## # ... with 47 more rows
bmbr_separate <- bomber_combined %>% separate(AC, c("Type","MD"), sep = " ")
head(bmbr_separate)
##     Type  MD   FY     Cost    FH  Gallons
## 1 Bomber B-1 1996 72753781 26914 88594449
## 2 Bomber B-1 1997 71297263 25219 85484074
## 3 Bomber B-1 1998 84026805 24205 85259038
## 4 Bomber B-1 1999 71848336 23306 79323816
## 5 Bomber B-1 2000 58439777 25013 86230284
## 6 Bomber B-1 2001 94946077 25059 86892432

Use of the unite function

bomber_prefix <- readRDS("/Users/aditya/Desktop/CourseWork/Data_Wrangling/Week_5/Data/bomber_prefix.rds")
as_tibble(bomber_prefix)
## # A tibble: 171 × 6
##      Type prefix number    FY Output Value
## *   <chr>  <chr>  <chr> <int>  <chr> <int>
## 1  Bomber      B      1  1996     FH 26914
## 2  Bomber      B      1  1997     FH 25219
## 3  Bomber      B      1  1998     FH 24205
## 4  Bomber      B      1  1999     FH 23306
## 5  Bomber      B      1  2000     FH 25013
## 6  Bomber      B      1  2001     FH 25059
## 7  Bomber      B      1  2002     FH 26581
## 8  Bomber      B      1  2003     FH 21491
## 9  Bomber      B      1  2004     FH 28118
## 10 Bomber      B      1  2005     FH 21859
## # ... with 161 more rows
bmbr_prefix <- bomber_prefix %>% unite(MD,prefix,number,sep="-")
head(bmbr_prefix)
##     Type  MD   FY Output Value
## 1 Bomber B-1 1996     FH 26914
## 2 Bomber B-1 1997     FH 25219
## 3 Bomber B-1 1998     FH 24205
## 4 Bomber B-1 1999     FH 23306
## 5 Bomber B-1 2000     FH 25013
## 6 Bomber B-1 2001     FH 25059

Cleaning up the messy bomber data and visualizing

We clean up the data using a variety of operators and manipulations. Post that we look at historical trends facetted by Cost,FH and Gallons variables

bmbr_mess <- readRDS("/Users/aditya/Desktop/CourseWork/Data_Wrangling/Week_5/Data/bomber_mess.rds")
as_tibble(bmbr_mess)
## # A tibble: 171 × 5
##      Type prefix number  Metric Value
## *   <chr>  <chr>  <chr>   <chr> <int>
## 1  Bomber      B      1 1996_FH 26914
## 2  Bomber      B      1 1997_FH 25219
## 3  Bomber      B      1 1998_FH 24205
## 4  Bomber      B      1 1999_FH 23306
## 5  Bomber      B      1 2000_FH 25013
## 6  Bomber      B      1 2001_FH 25059
## 7  Bomber      B      1 2002_FH 26581
## 8  Bomber      B      1 2003_FH 21491
## 9  Bomber      B      1 2004_FH 28118
## 10 Bomber      B      1 2005_FH 21859
## # ... with 161 more rows
bmbr_clean <- bmbr_mess %>% unite(MD,prefix,number,sep="-") %>% 
  separate(Metric,c("FY","Output"),sep="_") %>%
  mutate(FY=as.Date(paste0(FY,"-01-01")))

ggplot(bmbr_clean,aes(x=FY,y=Value,group=MD,colour=MD)) + geom_line() + 
  facet_wrap(~Output,scales = "free",ncol=1) + 
  scale_x_date(breaks = date_breaks("3 years"),date_labels = "%Y") +
  scale_y_continuous(labels = unit_format("k",1e-6)) +
  ggtitle("Historical Trends for various metrics") +
  xlab("Year")

bmbr_clean <- bmbr_clean %>% spread(Output,Value)
head(bmbr_clean)
##     Type  MD         FY     Cost    FH  Gallons
## 1 Bomber B-1 1996-01-01 72753781 26914 88594449
## 2 Bomber B-1 1997-01-01 71297263 25219 85484074
## 3 Bomber B-1 1998-01-01 84026805 24205 85259038
## 4 Bomber B-1 1999-01-01 71848336 23306 79323816
## 5 Bomber B-1 2000-01-01 58439777 25013 86230284
## 6 Bomber B-1 2001-01-01 94946077 25059 86892432

Operational Cost & Head Count by System

ws_programmatics <- readRDS("/Users/aditya/Desktop/CourseWork/Data_Wrangling/Week_5/Data/ws_programmatics.rds")
ws_categorization <- readRDS("/Users/aditya/Desktop/CourseWork/Data_Wrangling/Week_5/Data/ws_categorizations.rds")

ws_final <- ws_programmatics %>% full_join(ws_categorization,by=c("MD","Base")) %>%
  filter(FY==2014 & Base=="MINOT AFB (ND)") %>%
  filter(System %in% c("AIRCRAFT","MISSILES")) %>%
  group_by(System) %>% 
  summarise(Tot_Sum = sum(Total_O.S, na.rm = T) + sum(End_Strength, na.rm = T))
ws_final
## # A tibble: 2 × 2
##     System   Tot_Sum
##      <chr>     <dbl>
## 1 AIRCRAFT 297400258
## 2 MISSILES 112226860

Cost per Flying Hour

ws_fnl <-  ws_programmatics %>% full_join(ws_categorization,by=c("MD","Base")) %>%
  filter(FY==2014) %>%
  group_by(Base) %>%
  select(Base,Total_O.S,FH) %>%
  summarise(Total_O.S=sum(Total_O.S,na.rm = T), FH=sum(FH,na.rm = T)) %>%
  filter(Total_O.S > 0 & FH > 0) %>%
  mutate(CPFH=Total_O.S/FH) %>%
  arrange(-CPFH)

top_base <- ws_fnl %>% top_n(n=10)
## Selecting by CPFH
ggplot(top_base, aes(x=reorder(Base,CPFH),y=CPFH)) + geom_bar(stat="identity",width = 0.5,fill = "#56B4E9") +
coord_flip() + scale_y_continuous(labels = comma) + ggtitle("Top 10 bases by Cost per Flying Hour") +
ylab("Cost per Flying Hour") + xlab("Air Force Base")

Base which has the highest Cost per Flying Hour is PATRICK AFB (FL) with the Cost per Flying Hour being $1.789610^{5}

Relationship between Head Count & Operational Cost

ws_scatter <- ws_programmatics %>% full_join(ws_categorization,by=c("MD","Base"))

p1 <- ggplot(ws_scatter,aes(x=Total_O.S,y=End_Strength)) + geom_point(aes(colour=System)) +
scale_x_continuous(labels = unit_format("MM",1e-6), limits = c(0,1100000000)) + ylab("Personnel") +
xlab("Operational Cost on the base") + 
ggtitle("Relationship between Operational Cost and Personnel")

p2 <- ggplot(ws_scatter,aes(x=Total_O.S,y=End_Strength)) + geom_point(aes(colour=FY)) +
scale_x_continuous(labels = unit_format("MM",1e-6),limits = c(0,1100000000))+ ylab("Personnel") +
xlab("Operational Cost on the base")

p3 <- ggplot(ws_scatter,aes(x=Total_O.S,y=End_Strength)) + geom_point(aes(colour=FH)) +
scale_x_continuous(labels = unit_format("MM",1e-6),limits = c(0,1100000000))+ ylab("Personnel") +
xlab("Operational Cost on the base")

grid.arrange(p1,p2,p3,ncol=1)

The correlation between Head Count & Total Operational Cost is 0.8210234