This HTML document is a report of the completion of 5th week’s assignment. Here I have learned a new concept “tibbles”, which made working with data frames easier. I have also organized my data in a tidy way as part of this assignment and managed the relational but separate data frames.
Following packages were used to perform the exercise:
library(ggplot2) #To produce graphs in R
library(tidyverse) #set of packages which share common data
bomber_wide <- readRDS("data/bomber_wide.rds")
bomber_wide <- as_tibble(bomber_wide)
bomber_wide <- gather(bomber_wide, "year", "FH", 3:21)
bomber_wide
## # A tibble: 57 × 4
## Type MD year FH
## <chr> <chr> <chr> <int>
## 1 Bomber B-1 1996 26914
## 2 Bomber B-2 1996 2364
## 3 Bomber B-52 1996 28511
## 4 Bomber B-1 1997 25219
## 5 Bomber B-2 1997 2776
## 6 Bomber B-52 1997 26034
## 7 Bomber B-1 1998 24205
## 8 Bomber B-2 1998 2166
## 9 Bomber B-52 1998 25639
## 10 Bomber B-1 1999 23306
## # ... with 47 more rows
bomber_long <- readRDS("data/bomber_long.rds")
bomber_long <- as_tibble(bomber_long)
bomber_long <- spread(bomber_long, Output, Value)
bomber_long
## # A tibble: 57 × 6
## Type MD FY Cost FH Gallons
## * <chr> <chr> <int> <int> <int> <int>
## 1 Bomber B-1 1996 72753781 26914 88594449
## 2 Bomber B-1 1997 71297263 25219 85484074
## 3 Bomber B-1 1998 84026805 24205 85259038
## 4 Bomber B-1 1999 71848336 23306 79323816
## 5 Bomber B-1 2000 58439777 25013 86230284
## 6 Bomber B-1 2001 94946077 25059 86892432
## 7 Bomber B-1 2002 96458536 26581 89198262
## 8 Bomber B-1 2003 68650070 21491 74485788
## 9 Bomber B-1 2004 101895634 28118 101397707
## 10 Bomber B-1 2005 124816690 21859 78410415
## # ... with 47 more rows
bomber_combined <- readRDS("data/bomber_combined.rds")
bomber_combined <- as_tibble(bomber_combined)
bomber_combined <- bomber_combined %>%
separate(AC, into = c("Type", "MD"), sep = " ")
bomber_combined
## # A tibble: 57 × 6
## Type MD FY Cost FH Gallons
## * <chr> <chr> <int> <int> <int> <int>
## 1 Bomber B-1 1996 72753781 26914 88594449
## 2 Bomber B-1 1997 71297263 25219 85484074
## 3 Bomber B-1 1998 84026805 24205 85259038
## 4 Bomber B-1 1999 71848336 23306 79323816
## 5 Bomber B-1 2000 58439777 25013 86230284
## 6 Bomber B-1 2001 94946077 25059 86892432
## 7 Bomber B-1 2002 96458536 26581 89198262
## 8 Bomber B-1 2003 68650070 21491 74485788
## 9 Bomber B-1 2004 101895634 28118 101397707
## 10 Bomber B-1 2005 124816690 21859 78410415
## # ... with 47 more rows
bomber_prefix <- readRDS("data/bomber_prefix.rds")
bomber_prefix <- as_tibble(bomber_prefix)
bomber_prefix <- bomber_prefix %>%
unite(MD, prefix, number, sep = "-") %>%
spread(Output, Value)
bomber_prefix
## # A tibble: 57 × 6
## Type MD FY Cost FH Gallons
## * <chr> <chr> <int> <int> <int> <int>
## 1 Bomber B-1 1996 72753781 26914 88594449
## 2 Bomber B-1 1997 71297263 25219 85484074
## 3 Bomber B-1 1998 84026805 24205 85259038
## 4 Bomber B-1 1999 71848336 23306 79323816
## 5 Bomber B-1 2000 58439777 25013 86230284
## 6 Bomber B-1 2001 94946077 25059 86892432
## 7 Bomber B-1 2002 96458536 26581 89198262
## 8 Bomber B-1 2003 68650070 21491 74485788
## 9 Bomber B-1 2004 101895634 28118 101397707
## 10 Bomber B-1 2005 124816690 21859 78410415
## # ... with 47 more rows
bomber_mess <- readRDS("data/bomber_mess.rds")
bomber_mess <- as_tibble(bomber_mess)
bomber_mess <- bomber_mess %>%
unite(MD, prefix, number, sep = "-") %>%
separate(Metric, into = c("FY", "Output"), sep = "_") %>%
spread(Output, Value)
bomber_mess
## # A tibble: 57 × 6
## Type MD FY Cost FH Gallons
## * <chr> <chr> <chr> <int> <int> <int>
## 1 Bomber B-1 1996 72753781 26914 88594449
## 2 Bomber B-1 1997 71297263 25219 85484074
## 3 Bomber B-1 1998 84026805 24205 85259038
## 4 Bomber B-1 1999 71848336 23306 79323816
## 5 Bomber B-1 2000 58439777 25013 86230284
## 6 Bomber B-1 2001 94946077 25059 86892432
## 7 Bomber B-1 2002 96458536 26581 89198262
## 8 Bomber B-1 2003 68650070 21491 74485788
## 9 Bomber B-1 2004 101895634 28118 101397707
## 10 Bomber B-1 2005 124816690 21859 78410415
## # ... with 47 more rows
bomber_mess <- bomber_mess %>% gather( Metric, Values, Cost:Gallons)
ggplot(data=bomber_mess, aes(x=FY, y=Values, group = MD, colour = MD)) + geom_line() + geom_point() + facet_grid(Metric~., scales = "free")
ws_programmatics <- readRDS("data/ws_programmatics.rds")
ws_programmatics <- as_tibble(ws_programmatics)
ws_categorizations <- readRDS("data/ws_categorizations.rds")
ws_categorizations <- as_tibble(ws_categorizations)
ws_prog_cat1 <- ws_programmatics %>%
inner_join(ws_categorizations, by = c("MD", "Base") ) %>%
filter(FY=="2014", Base=="MINOT AFB (ND)") %>%
filter(System=="AIRCRAFT" || "MISSILES") %>%
group_by(System) %>%
summarise(TotalOS=sum(Total_O.S,na.rm=TRUE),EndStrength=sum(End_Strength,na.rm=TRUE))
ws_prog_cat1
## # A tibble: 5 × 3
## System TotalOS EndStrength
## <chr> <dbl> <dbl>
## 1 AIRCRAFT 297398235 2023
## 2 MISSILES 112224909 1951
## 3 MUNITIONS 5916637 99
## 4 OTHER 4088474 66
## 5 SPACE 211031 5
ws_prog_cat7 <- ws_programmatics %>%
inner_join(ws_categorizations, by = c("MD", "Base") ) %>%
filter(FY=="2014") %>%
mutate(CPFH=Total_O.S/FH) %>%
arrange(desc(CPFH)) %>%
select(Base, CPFH)
ws_prog_cat7
## # A tibble: 1,617 × 2
## Base CPFH
## <chr> <dbl>
## 1 WRIGHT-PATTERSON AFB (OH) 348613.78
## 2 TINKER AFB (OK) 275482.78
## 3 HICKAM AFB (HI) 107316.76
## 4 WRIGHT-PATTERSON AFB (OH) 94500.40
## 5 BEALE AFB (CA) 90662.10
## 6 WRIGHT-PATTERSON AFB (OH) 85884.55
## 7 LANGLEY AFB (VA) 78604.00
## 8 OFFUTT AFB (NE) 57186.30
## 9 EDWARDS AFB (CA) 56329.55
## 10 OFFUTT AFB (NE) 55877.62
## # ... with 1,607 more rows
# Base which had the largest cost per flying hour
top_n(ws_prog_cat7, 1)
## Selecting by CPFH
## # A tibble: 1 × 2
## Base CPFH
## <chr> <dbl>
## 1 WRIGHT-PATTERSON AFB (OH) 348613.8
ggplot(head(ws_prog_cat7,10), aes(x=Base, y=CPFH)) +
geom_bar(stat = 'identity', color = 'green', fill='lightgreen')
ws_prog_cat_join <- ws_programmatics %>%
inner_join(ws_categorizations, by = c("MD", "Base"))
ws_prog_cat_join
## # A tibble: 36,328 × 19
## Base MD FY Manpower_Ops Manpower_Mx
## <chr> <chr> <int> <dbl> <dbl>
## 1 ALTUS AFB (OK) A-10 2007 NA NA
## 2 ALTUS AFB (OK) A-10 2008 NA NA
## 3 ALTUS AFB (OK) AT-38 1997 NA NA
## 4 ALTUS AFB (OK) AT-38 1998 NA NA
## 5 ALTUS AFB (OK) C-130 1998 NA NA
## 6 ALTUS AFB (OK) C-130 1999 NA NA
## 7 ALTUS AFB (OK) C-130 2000 NA NA
## 8 ALTUS AFB (OK) C-130 2002 NA NA
## 9 ALTUS AFB (OK) C-130 2003 NA NA
## 10 ALTUS AFB (OK) C-130 2004 NA 19912
## # ... with 36,318 more rows, and 14 more variables:
## # Manpower_Support_Staff <dbl>, Operating_Material <dbl>,
## # Mx_Consumables <dbl>, Mx_DLR <dbl>, Mx_Depot_AC <dbl>,
## # Mx_Depot_Missile <dbl>, Mx_Depot_Engine <dbl>, CLS <dbl>,
## # Total_O.S <dbl>, Avg_Inv <dbl>, TAI <dbl>, End_Strength <dbl>,
## # FH <dbl>, System <chr>
ggplot(ws_prog_cat_join, aes(x=End_Strength, y=Total_O.S))+
geom_point(aes(shape=factor(System)), color='brown', na.rm = TRUE)+
ggtitle("End Strength VS Total_O.S - System")
ggplot(ws_prog_cat_join, aes(x=End_Strength, y=Total_O.S))+
geom_point(aes(color=FY), na.rm = TRUE) +
ggtitle("End Strength VS Total_O.S - Year")
ggplot(ws_prog_cat_join, aes(x=End_Strength, y=Total_O.S))+
geom_point(aes(color=factor(Avg_Inv)), na.rm = TRUE, show.legend = FALSE) +
ggtitle("End Strength VS Total_O.S - Average Investment")