Week 5 Analysis

Synopsis

This HTML document is associated with the R Markdown file that is used to complete the week 4 assignment.This R markdown file made by me summarizes the packages I have used to complete my homework assignment. RMD file is actually a very helpful tool that summarizes the whole task. Our code along with their results get documented neatly. Relation data concepts were helpful to organize my analysis.

Packages Required

Following packages were installed and used:

library(ggplot2)  #Package for multi-layered graphs in R
library(tidyverse) #Set of packages including dpylr and ggplot
library(rio) #Read RDS file

Source Code

The markdown is based on seven data sets that contains information about United States Air Force bomber aircrafts. Following variables are parts of these data sets:

  1. Type: Represents the type of Bomber aircrafts

  2. MD: Combination of basic mission of the aircraft and the design number

  3. FY: Fiscal year from 1996-2014

  4. Cost: Represents the total operational cost for flying, maintaining, repairing, and managing the aircraft

  5. FH: Represents the flying hours, which is the total time the aircraft is in the air flying a mission or in training

  6. Gallons: Represents the total gallons of fuel burned by the aircraft

P.S: The data sets contains no missing values

Question 1

bomber_wide <- import("bomber_wide.rds")
bomber_wide_2 <- bomber_wide %>% gather(3:21,key = "Year" ,value = "FH")
as_tibble(bomber_wide_2)
## # A tibble: 57 x 4
##      Type    MD  Year    FH
##     <chr> <chr> <chr> <int>
## 1  Bomber   B-1  1996 26914
## 2  Bomber   B-2  1996  2364
## 3  Bomber  B-52  1996 28511
## 4  Bomber   B-1  1997 25219
## 5  Bomber   B-2  1997  2776
## 6  Bomber  B-52  1997 26034
## 7  Bomber   B-1  1998 24205
## 8  Bomber   B-2  1998  2166
## 9  Bomber  B-52  1998 25639
## 10 Bomber   B-1  1999 23306
## # ... with 47 more rows
#Counting missing values
sum(is.na(bomber_wide_2$FH))
## [1] 0
bomber_wide_2[!complete.cases(bomber_wide_2),]
## [1] Type MD   Year FH  
## <0 rows> (or 0-length row.names)

Question 2

bomber_long <- as_tibble(import("bomber_long.rds"))
bomber_long_2 <- bomber_long %>% spread(key = "Output" ,value = "Value")
bomber_long_2
## # A tibble: 57 x 6
##      Type    MD    FY      Cost    FH   Gallons
## *   <chr> <chr> <int>     <int> <int>     <int>
## 1  Bomber   B-1  1996  72753781 26914  88594449
## 2  Bomber   B-1  1997  71297263 25219  85484074
## 3  Bomber   B-1  1998  84026805 24205  85259038
## 4  Bomber   B-1  1999  71848336 23306  79323816
## 5  Bomber   B-1  2000  58439777 25013  86230284
## 6  Bomber   B-1  2001  94946077 25059  86892432
## 7  Bomber   B-1  2002  96458536 26581  89198262
## 8  Bomber   B-1  2003  68650070 21491  74485788
## 9  Bomber   B-1  2004 101895634 28118 101397707
## 10 Bomber   B-1  2005 124816690 21859  78410415
## # ... with 47 more rows

Question 3

bomber_combined <- as_tibble(import("bomber_combined.rds"))
bomber_combined_2 <- bomber_combined %>% separate(AC,into  = c("Type","MD"),sep=" ")
bomber_combined_2
## # A tibble: 57 x 6
##      Type    MD    FY      Cost    FH   Gallons
## *   <chr> <chr> <int>     <int> <int>     <int>
## 1  Bomber   B-1  1996  72753781 26914  88594449
## 2  Bomber   B-1  1997  71297263 25219  85484074
## 3  Bomber   B-1  1998  84026805 24205  85259038
## 4  Bomber   B-1  1999  71848336 23306  79323816
## 5  Bomber   B-1  2000  58439777 25013  86230284
## 6  Bomber   B-1  2001  94946077 25059  86892432
## 7  Bomber   B-1  2002  96458536 26581  89198262
## 8  Bomber   B-1  2003  68650070 21491  74485788
## 9  Bomber   B-1  2004 101895634 28118 101397707
## 10 Bomber   B-1  2005 124816690 21859  78410415
## # ... with 47 more rows

Question 4

bomber_prefix <- as_tibble(import("bomber_prefix.rds"))
bomber_prefix_2 <- bomber_prefix %>% unite(MD,prefix,number,sep="-")
bomber_prefix_2
## # A tibble: 171 x 5
##      Type    MD    FY Output Value
## *   <chr> <chr> <int>  <chr> <int>
## 1  Bomber   B-1  1996     FH 26914
## 2  Bomber   B-1  1997     FH 25219
## 3  Bomber   B-1  1998     FH 24205
## 4  Bomber   B-1  1999     FH 23306
## 5  Bomber   B-1  2000     FH 25013
## 6  Bomber   B-1  2001     FH 25059
## 7  Bomber   B-1  2002     FH 26581
## 8  Bomber   B-1  2003     FH 21491
## 9  Bomber   B-1  2004     FH 28118
## 10 Bomber   B-1  2005     FH 21859
## # ... with 161 more rows

Question 5

bomber_mess <- as_tibble(import("bomber_mess.rds"))
bomber_mess_2 <- bomber_mess %>%
                        unite(MD,prefix,number,sep="-") %>%
                        separate(Metric,into = c("FY","Metric1"),sep="_") %>%
                        spread(key = "Metric1" ,value = "Value")
bomber_mess_2
## # A tibble: 57 x 6
##      Type    MD    FY      Cost    FH   Gallons
## *   <chr> <chr> <chr>     <int> <int>     <int>
## 1  Bomber   B-1  1996  72753781 26914  88594449
## 2  Bomber   B-1  1997  71297263 25219  85484074
## 3  Bomber   B-1  1998  84026805 24205  85259038
## 4  Bomber   B-1  1999  71848336 23306  79323816
## 5  Bomber   B-1  2000  58439777 25013  86230284
## 6  Bomber   B-1  2001  94946077 25059  86892432
## 7  Bomber   B-1  2002  96458536 26581  89198262
## 8  Bomber   B-1  2003  68650070 21491  74485788
## 9  Bomber   B-1  2004 101895634 28118 101397707
## 10 Bomber   B-1  2005 124816690 21859  78410415
## # ... with 47 more rows
bomber_mess_3 <- bomber_mess %>%
                         unite(MD,prefix,number,sep="-") %>%
                         separate(Metric,into = c("FY","Metric1"),sep="_")

ggplot(data=bomber_mess_3, aes(x=FY, y=Value,group=MD, colour = MD)) + 
geom_line() + geom_point() + facet_grid(Metric1~., scales = "free") +
ggtitle("Historical data for mission aircrafts") +
labs(y="Value", x="Year")

Question 6

ws_programmatics <- as_tibble(import("ws_programmatics.rds"))
ws_categorization <- as_tibble(import("ws_categorizations.rds"))

ws_temp <- ws_programmatics %>% 
           left_join(ws_categorization,by=c("Base","MD")) %>%
           filter(FY == 2014  & Base == "MINOT AFB (ND)") %>%
           filter(System=="AIRCRAFT" | System=="MISSILES") %>%
           group_by(System) %>%
           summarise(Total_Sum = sum(Total_O.S,na.rm=TRUE),Total_end_Strength = sum(End_Strength,na.rm=TRUE))
ws_temp
## # A tibble: 2 x 3
##     System Total_Sum Total_end_Strength
##      <chr>     <dbl>              <dbl>
## 1 AIRCRAFT 297398235               2023
## 2 MISSILES 112224909               1951

Question 7

ws_temp_top10 <- ws_programmatics %>% 
                 left_join(ws_categorization,by=c("Base","MD")) %>%
                 filter(FY == 2014) %>%
                 group_by(Base) %>%
                 summarise(CPFH = sum(Total_O.S,na.rm=TRUE)/sum(FH,na.rm=TRUE)) %>%
                 filter(CPFH != Inf) %>%
                 arrange(desc(CPFH)) %>%
                 head(n=10) %>%
                 select(Base,CPFH)

ws_temp_top10 %>% ggplot() +
                  geom_bar(mapping = aes(x = reorder(Base,CPFH),y=CPFH,fill = Base),stat="Identity") +
                  coord_flip() +
                  ggtitle(" Top 10 bases with the largest cost per flying hours") +
                  labs(y="Cost/Flying Hours", x="Bases")

Question 8

ws_FY <- ws_programmatics %>% 
  left_join(ws_categorization,by=c("Base","MD")) %>%
  group_by(FY) %>%
  summarise(cost=sum(Total_O.S,na.rm=TRUE),tot_ES=sum(End_Strength,na.rm=TRUE)) %>%
  select(FY,cost,tot_ES)

ws_FY%>%
ggplot() + 
  geom_point(mapping = aes(x = tot_ES, y = cost,colour = FY))

ws_System <- ws_programmatics %>% 
  left_join(ws_categorization, by=c("Base","MD")) %>%
  group_by(System) %>%
  summarise(cost=sum(Total_O.S,na.rm=TRUE),tot_ES=sum(End_Strength,na.rm=TRUE)) %>%
  select(System,cost,tot_ES)

ws_System%>%
  ggplot() + 
  geom_point(mapping = aes(x = tot_ES, y = cost,colour = System))

ws_Base <- ws_programmatics %>% 
  left_join(ws_categorization,by=c("Base","MD")) %>%
  group_by(Base) %>%
  summarise(cost=sum(Total_O.S,na.rm=TRUE),tot_ES=sum(End_Strength,na.rm=TRUE)) %>%
  select(Base,cost,tot_ES)

ws_Base%>%
  ggplot() + 
  geom_point(mapping = aes(x = tot_ES, y = cost,colour = Base))