Synopsis

This HTML document is a report of the completion of 5th week’s assignment. Here I have learned a new concept “tibbles”, which made working with data frames easier. I have also organized my data in a tidy way as part of this assignment and managed the relational but separate data frames.

Packages Required

Following packages were used to perform the exercise:

library(ggplot2)   #To produce graphs in R
library(tidyverse) #set of packages which share common data

Exercise on multiple datasets

Q.1

bomber_wide <- readRDS("data/bomber_wide.rds")
bomber_wide <- as_tibble(bomber_wide)
bomber_wide <- gather(bomber_wide, "year", "FH", 3:21)
bomber_wide
## # A tibble: 57 × 4
##      Type    MD  year    FH
##     <chr> <chr> <chr> <int>
## 1  Bomber   B-1  1996 26914
## 2  Bomber   B-2  1996  2364
## 3  Bomber  B-52  1996 28511
## 4  Bomber   B-1  1997 25219
## 5  Bomber   B-2  1997  2776
## 6  Bomber  B-52  1997 26034
## 7  Bomber   B-1  1998 24205
## 8  Bomber   B-2  1998  2166
## 9  Bomber  B-52  1998 25639
## 10 Bomber   B-1  1999 23306
## # ... with 47 more rows

Q.2

bomber_long <- readRDS("data/bomber_long.rds")
bomber_long <- as_tibble(bomber_long)
bomber_long <- spread(bomber_long, Output, Value)
bomber_long
## # A tibble: 57 × 6
##      Type    MD    FY      Cost    FH   Gallons
## *   <chr> <chr> <int>     <int> <int>     <int>
## 1  Bomber   B-1  1996  72753781 26914  88594449
## 2  Bomber   B-1  1997  71297263 25219  85484074
## 3  Bomber   B-1  1998  84026805 24205  85259038
## 4  Bomber   B-1  1999  71848336 23306  79323816
## 5  Bomber   B-1  2000  58439777 25013  86230284
## 6  Bomber   B-1  2001  94946077 25059  86892432
## 7  Bomber   B-1  2002  96458536 26581  89198262
## 8  Bomber   B-1  2003  68650070 21491  74485788
## 9  Bomber   B-1  2004 101895634 28118 101397707
## 10 Bomber   B-1  2005 124816690 21859  78410415
## # ... with 47 more rows

Q.3

bomber_combined <- readRDS("data/bomber_combined.rds")
bomber_combined <- as_tibble(bomber_combined)
bomber_combined <-  bomber_combined %>% 
                    separate(AC, into = c("Type", "MD"), sep = " ")
bomber_combined
## # A tibble: 57 × 6
##      Type    MD    FY      Cost    FH   Gallons
## *   <chr> <chr> <int>     <int> <int>     <int>
## 1  Bomber   B-1  1996  72753781 26914  88594449
## 2  Bomber   B-1  1997  71297263 25219  85484074
## 3  Bomber   B-1  1998  84026805 24205  85259038
## 4  Bomber   B-1  1999  71848336 23306  79323816
## 5  Bomber   B-1  2000  58439777 25013  86230284
## 6  Bomber   B-1  2001  94946077 25059  86892432
## 7  Bomber   B-1  2002  96458536 26581  89198262
## 8  Bomber   B-1  2003  68650070 21491  74485788
## 9  Bomber   B-1  2004 101895634 28118 101397707
## 10 Bomber   B-1  2005 124816690 21859  78410415
## # ... with 47 more rows

Q.4

bomber_prefix <- readRDS("data/bomber_prefix.rds")
bomber_prefix <- as_tibble(bomber_prefix)
bomber_prefix <-  bomber_prefix %>% 
                  unite(MD, prefix, number, sep = "-") %>% 
                  spread(Output, Value)
bomber_prefix
## # A tibble: 57 × 6
##      Type    MD    FY      Cost    FH   Gallons
## *   <chr> <chr> <int>     <int> <int>     <int>
## 1  Bomber   B-1  1996  72753781 26914  88594449
## 2  Bomber   B-1  1997  71297263 25219  85484074
## 3  Bomber   B-1  1998  84026805 24205  85259038
## 4  Bomber   B-1  1999  71848336 23306  79323816
## 5  Bomber   B-1  2000  58439777 25013  86230284
## 6  Bomber   B-1  2001  94946077 25059  86892432
## 7  Bomber   B-1  2002  96458536 26581  89198262
## 8  Bomber   B-1  2003  68650070 21491  74485788
## 9  Bomber   B-1  2004 101895634 28118 101397707
## 10 Bomber   B-1  2005 124816690 21859  78410415
## # ... with 47 more rows

Q.5

bomber_mess <- readRDS("data/bomber_mess.rds")
bomber_mess <- as_tibble(bomber_mess)
bomber_mess <-  bomber_mess %>% 
                unite(MD, prefix, number, sep = "-") %>% 
                separate(Metric, into = c("FY", "Output"), sep = "_") %>% 
                spread(Output, Value)  
bomber_mess
## # A tibble: 57 × 6
##      Type    MD    FY      Cost    FH   Gallons
## *   <chr> <chr> <chr>     <int> <int>     <int>
## 1  Bomber   B-1  1996  72753781 26914  88594449
## 2  Bomber   B-1  1997  71297263 25219  85484074
## 3  Bomber   B-1  1998  84026805 24205  85259038
## 4  Bomber   B-1  1999  71848336 23306  79323816
## 5  Bomber   B-1  2000  58439777 25013  86230284
## 6  Bomber   B-1  2001  94946077 25059  86892432
## 7  Bomber   B-1  2002  96458536 26581  89198262
## 8  Bomber   B-1  2003  68650070 21491  74485788
## 9  Bomber   B-1  2004 101895634 28118 101397707
## 10 Bomber   B-1  2005 124816690 21859  78410415
## # ... with 47 more rows
bomber_mess <- bomber_mess %>%  gather( Metric, Values, Cost:Gallons)
ggplot(data=bomber_mess, aes(x=FY, y=Values, group = MD, colour = MD)) + geom_line() + geom_point() + facet_grid(Metric~., scales = "free")

Q.6

ws_programmatics <- readRDS("data/ws_programmatics.rds")
ws_programmatics <- as_tibble(ws_programmatics)
ws_categorizations <- readRDS("data/ws_categorizations.rds")
ws_categorizations <- as_tibble(ws_categorizations)

ws_prog_cat1 <-  ws_programmatics %>% 
                inner_join(ws_categorizations, by = c("MD", "Base") ) %>% 
                filter(FY=="2014", Base=="MINOT AFB (ND)") %>% 
                filter(System=="AIRCRAFT" || "MISSILES") %>% 
                group_by(System) %>% 
                summarise(TotalOS=sum(Total_O.S,na.rm=TRUE),EndStrength=sum(End_Strength,na.rm=TRUE))
ws_prog_cat1
## # A tibble: 5 × 3
##      System   TotalOS EndStrength
##       <chr>     <dbl>       <dbl>
## 1  AIRCRAFT 297398235        2023
## 2  MISSILES 112224909        1951
## 3 MUNITIONS   5916637          99
## 4     OTHER   4088474          66
## 5     SPACE    211031           5

Q.7

ws_prog_cat7 <-   ws_programmatics %>% 
                  inner_join(ws_categorizations, by = c("MD", "Base") ) %>% 
                  filter(FY=="2014") %>% 
                  mutate(CPFH=Total_O.S/FH) %>%
                  arrange(desc(CPFH)) %>% 
                  select(Base, CPFH)
ws_prog_cat7
## # A tibble: 1,617 × 2
##                         Base      CPFH
##                        <chr>     <dbl>
## 1  WRIGHT-PATTERSON AFB (OH) 348613.78
## 2            TINKER AFB (OK) 275482.78
## 3            HICKAM AFB (HI) 107316.76
## 4  WRIGHT-PATTERSON AFB (OH)  94500.40
## 5             BEALE AFB (CA)  90662.10
## 6  WRIGHT-PATTERSON AFB (OH)  85884.55
## 7           LANGLEY AFB (VA)  78604.00
## 8            OFFUTT AFB (NE)  57186.30
## 9           EDWARDS AFB (CA)  56329.55
## 10           OFFUTT AFB (NE)  55877.62
## # ... with 1,607 more rows
# Base which had the largest cost per flying hour
top_n(ws_prog_cat7, 1)
## Selecting by CPFH
## # A tibble: 1 × 2
##                        Base     CPFH
##                       <chr>    <dbl>
## 1 WRIGHT-PATTERSON AFB (OH) 348613.8
ggplot(head(ws_prog_cat7,10), aes(x=Base, y=CPFH)) +
  geom_bar(stat = 'identity', color = 'green', fill='lightgreen')

Q.8

ws_prog_cat_join <-  ws_programmatics %>% 
  inner_join(ws_categorizations, by = c("MD", "Base"))
ws_prog_cat_join
## # A tibble: 36,328 × 19
##              Base    MD    FY Manpower_Ops Manpower_Mx
##             <chr> <chr> <int>        <dbl>       <dbl>
## 1  ALTUS AFB (OK)  A-10  2007           NA          NA
## 2  ALTUS AFB (OK)  A-10  2008           NA          NA
## 3  ALTUS AFB (OK) AT-38  1997           NA          NA
## 4  ALTUS AFB (OK) AT-38  1998           NA          NA
## 5  ALTUS AFB (OK) C-130  1998           NA          NA
## 6  ALTUS AFB (OK) C-130  1999           NA          NA
## 7  ALTUS AFB (OK) C-130  2000           NA          NA
## 8  ALTUS AFB (OK) C-130  2002           NA          NA
## 9  ALTUS AFB (OK) C-130  2003           NA          NA
## 10 ALTUS AFB (OK) C-130  2004           NA       19912
## # ... with 36,318 more rows, and 14 more variables:
## #   Manpower_Support_Staff <dbl>, Operating_Material <dbl>,
## #   Mx_Consumables <dbl>, Mx_DLR <dbl>, Mx_Depot_AC <dbl>,
## #   Mx_Depot_Missile <dbl>, Mx_Depot_Engine <dbl>, CLS <dbl>,
## #   Total_O.S <dbl>, Avg_Inv <dbl>, TAI <dbl>, End_Strength <dbl>,
## #   FH <dbl>, System <chr>
ggplot(ws_prog_cat_join, aes(x=End_Strength, y=Total_O.S))+
  geom_point(aes(shape=factor(System)), color='brown', na.rm = TRUE)+
  ggtitle("End Strength VS Total_O.S - System")

ggplot(ws_prog_cat_join, aes(x=End_Strength, y=Total_O.S))+
  geom_point(aes(color=FY), na.rm = TRUE) +
  ggtitle("End Strength VS Total_O.S - Year")

ggplot(ws_prog_cat_join, aes(x=End_Strength, y=Total_O.S))+
  geom_point(aes(color=factor(Avg_Inv)), na.rm = TRUE, show.legend = FALSE) +
  ggtitle("End Strength VS Total_O.S - Average Investment")