Week 5 Assignment

Angie Chen

November 12, 2016


Packages used in Assignment 5

library(tibble) # used to create tibbles
library(tidyr) # used to tidy up data
library(dplyr) # data manipulation
library(ggplot2) # used to visualize data
library(prettydoc) # document themes for R Markdown

1. Bomber_Wide RDS file - convert from wide to long

BWide <- as_tibble(readRDS("bomber_wide.rds"))
BLong <- gather(BWide, Year, FH, c(3:21), factor_key = TRUE)
head(BLong, n=5) # show first 5 rows of tibble
## # A tibble: 5 × 4
##     Type    MD   Year    FH
##    <chr> <chr> <fctr> <int>
## 1 Bomber   B-1   1996 26914
## 2 Bomber   B-2   1996  2364
## 3 Bomber  B-52   1996 28511
## 4 Bomber   B-1   1997 25219
## 5 Bomber   B-2   1997  2776

2. Bomber_Long RDS file - convert from long to wide

BLong2 <- as_tibble(readRDS("bomber_long.rds"))
BWide2 <- spread(BLong2, Output, Value)
head(BWide2, n=5) # show first 5 rows of tibble
## # A tibble: 5 × 6
##     Type    MD    FY     Cost    FH  Gallons
##    <chr> <chr> <int>    <int> <int>    <int>
## 1 Bomber   B-1  1996 72753781 26914 88594449
## 2 Bomber   B-1  1997 71297263 25219 85484074
## 3 Bomber   B-1  1998 84026805 24205 85259038
## 4 Bomber   B-1  1999 71848336 23306 79323816
## 5 Bomber   B-1  2000 58439777 25013 86230284

3. Bomber_Combined RDS file - split variable into two separate variables

BComb <- as_tibble(readRDS("bomber_combined.rds")) %>% 
  separate(AC, into = c("Type","MD"), sep = " ")
head(BComb, n=5) # show first 5 rows of tibble
## # A tibble: 5 × 6
##     Type    MD    FY     Cost    FH  Gallons
##    <chr> <chr> <int>    <int> <int>    <int>
## 1 Bomber   B-1  1996 72753781 26914 88594449
## 2 Bomber   B-1  1997 71297263 25219 85484074
## 3 Bomber   B-1  1998 84026805 24205 85259038
## 4 Bomber   B-1  1999 71848336 23306 79323816
## 5 Bomber   B-1  2000 58439777 25013 86230284

4. Bomber_Prefix RDS file - combine variables into one variable

BPrefix <- as_tibble(readRDS("bomber_prefix.rds")) %>%
  unite(MD, prefix, number, sep = "-")
head(BPrefix, n=5) # show first 5 rows of tibble
## # A tibble: 5 × 5
##     Type    MD    FY Output Value
##    <chr> <chr> <int>  <chr> <int>
## 1 Bomber   B-1  1996     FH 26914
## 2 Bomber   B-1  1997     FH 25219
## 3 Bomber   B-1  1998     FH 24205
## 4 Bomber   B-1  1999     FH 23306
## 5 Bomber   B-1  2000     FH 25013

5. Bomber_Mess RDS file - cleaning up data

Make the tibble contain the following variables:

  • Type
  • MD which combines the prefix and number variable (i.e. “B-1”)
  • FY which is the left part of the Metric variable
  • Cost which is captured in the right part of the Metric variable
  • FH which is captured in the right part of the Metric variable
  • Gallons which is captured in the right part of the Metric variable
BMess <- as_tibble(readRDS("bomber_mess.rds")) %>%
  unite(MD, prefix, number, sep = "-") %>%
  separate(Metric, into = c("FY", "Output"), sep = "_") %>%
  spread(Output, Value)
head(BMess, n=5) # show first 5 rows of tibble
## # A tibble: 5 × 6
##     Type    MD    FY     Cost    FH  Gallons
##    <chr> <chr> <chr>    <int> <int>    <int>
## 1 Bomber   B-1  1996 72753781 26914 88594449
## 2 Bomber   B-1  1997 71297263 25219 85484074
## 3 Bomber   B-1  1998 84026805 24205 85259038
## 4 Bomber   B-1  1999 71848336 23306 79323816
## 5 Bomber   B-1  2000 58439777 25013 86230284

6. Ws_Programmtics and Ws_Categorization RDS files - joining data sets

Perform the following:

  • Join the ws_categorization data to the ws_programmatics data
  • Filter for only FY 2014 data at the following Base: Minot AFB (ND)
  • Filter for only Systems classified as “AIRCRAFT” or “MISSILES”
  • Group the data by System level
  • Calculate the total sum of the Total_O.S and End_Strength variables
# import data sets as tibbles
WSCat <- as_tibble(readRDS("ws_categorizations.rds"))
WSProg <- as_tibble(readRDS("ws_programmatics.rds"))

# join data sets
WSJoin <- inner_join(WSCat, WSProg) 

# perform filters and calculations noted above
WSJoin %>% filter(Base == "MINOT AFB (ND)" & FY == 2014)%>%
  filter(System == "AIRCRAFT" | System == "MISSILES") %>%
  group_by(System) %>% 
  summarise(TotalOS_Sum = 
              sum(na.omit(Total_O.S)), 
              EndStrength_Sum = sum(na.omit(End_Strength)))
## # A tibble: 2 × 3
##     System TotalOS_Sum EndStrength_Sum
##      <chr>       <dbl>           <dbl>
## 1 AIRCRAFT   297398235            2023
## 2 MISSILES   112224909            1951

7. Calculate CPFH (Cost per Flying Hour)

Note: CPFH = total cost / flying hours = Total_O.S / FH

Base with the highest CPFH

# import data sets as tibbles
WSCat <- as_tibble(readRDS("ws_categorizations.rds"))
WSProg <- as_tibble(readRDS("ws_programmatics.rds"))

# join data sets
WSJoin <- inner_join(WSCat, WSProg) 

# create a new variable for CPFH
WSJoin$CPFH <- WSJoin$Total_O.S / WSJoin$FH

# find base with highest CPFH 
WSJoin %>% group_by(Base) %>%
  summarize(CPFH = sum(na.omit(CPFH))) %>% 
  arrange(desc(CPFH)) %>% top_n(1)
## # A tibble: 1 × 2
##                        Base     CPFH
##                       <chr>    <dbl>
## 1 WRIGHT-PATTERSON AFB (OH) 15567805

Top 10 Bases with Largest CPFH

WSJoinTen <- WSJoin %>% group_by(Base) %>%
  summarize(CPFH = sum(na.omit(CPFH))) %>%
  arrange(desc(CPFH)) %>% top_n(10)

ggplot(WSJoinTen, aes(Base, CPFH, fill = Base)) + 
  geom_bar(width = 0.4, 
           position = position_dodge(width=0.5), 
           stat = "identity") + 
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank())

8. Comparing End Strength and Total Costs

Assess the relationship between the end strength (End_Strength) variable and total costs (Total_O.S).

na.omit(WSJoin)
## # A tibble: 0 × 20
## # ... with 20 variables: Base <chr>, System <chr>, MD <chr>, FY <int>,
## #   Manpower_Ops <dbl>, Manpower_Mx <dbl>, Manpower_Support_Staff <dbl>,
## #   Operating_Material <dbl>, Mx_Consumables <dbl>, Mx_DLR <dbl>,
## #   Mx_Depot_AC <dbl>, Mx_Depot_Missile <dbl>, Mx_Depot_Engine <dbl>,
## #   CLS <dbl>, Total_O.S <dbl>, Avg_Inv <dbl>, TAI <dbl>,
## #   End_Strength <dbl>, FH <dbl>, CPFH <dbl>
# Plot of End Strength vs. Total Costs by System
ggplot(WSJoin, aes(Total_O.S, End_Strength, group = System, color = System)) + 
  geom_point() + 
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank()) + 
  ggtitle("End Strength vs. Total Costs by System")

# Plot of End Strength vs. Total Costs by Average Inventory
ggplot(WSJoin, aes(Total_O.S, End_Strength, 
                   group = Avg_Inv, color = Avg_Inv)) +
  geom_point() + 
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank()) + 
  ggtitle("End Strength vs. Total Costs by Average Inventory")

# Plot of End Strength vs. Total Costs by Cost of Manpower Operations
ggplot(WSJoin, aes(Total_O.S, End_Strength, 
                   group = Manpower_Ops, color = Manpower_Ops)) +
  geom_point() + 
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank()) + 
  ggtitle("End Strength vs. Total Costs by Cost of Manpower Operations")