This HTML document is associated with the R Markdown file that is used to complete the week 4 assignment.This R markdown file made by me summarizes the packages I have used to complete my homework assignment. RMD file is actually a very helpful tool that summarizes the whole task. Our code along with their results get documented neatly. Relation data concepts were helpful to organize my analysis.
Following packages were installed and used:
library(ggplot2) #Package for multi-layered graphs in R
library(tidyverse) #Set of packages including dpylr and ggplot
library(rio) #Read RDS file
The markdown is based on seven data sets that contains information about United States Air Force bomber aircrafts. Following variables are parts of these data sets:
Type: Represents the type of Bomber aircrafts
MD: Combination of basic mission of the aircraft and the design number
FY: Fiscal year from 1996-2014
Cost: Represents the total operational cost for flying, maintaining, repairing, and managing the aircraft
FH: Represents the flying hours, which is the total time the aircraft is in the air flying a mission or in training
Gallons: Represents the total gallons of fuel burned by the aircraft
P.S: The data sets contains no missing values
bomber_wide <- import("bomber_wide.rds")
bomber_wide_2 <- bomber_wide %>% gather(3:21,key = "Year" ,value = "FH")
as_tibble(bomber_wide_2)
## # A tibble: 57 x 4
## Type MD Year FH
## <chr> <chr> <chr> <int>
## 1 Bomber B-1 1996 26914
## 2 Bomber B-2 1996 2364
## 3 Bomber B-52 1996 28511
## 4 Bomber B-1 1997 25219
## 5 Bomber B-2 1997 2776
## 6 Bomber B-52 1997 26034
## 7 Bomber B-1 1998 24205
## 8 Bomber B-2 1998 2166
## 9 Bomber B-52 1998 25639
## 10 Bomber B-1 1999 23306
## # ... with 47 more rows
#Counting missing values
sum(is.na(bomber_wide_2$FH))
## [1] 0
bomber_wide_2[!complete.cases(bomber_wide_2),]
## [1] Type MD Year FH
## <0 rows> (or 0-length row.names)
bomber_long <- as_tibble(import("bomber_long.rds"))
bomber_long_2 <- bomber_long %>% spread(key = "Output" ,value = "Value")
bomber_long_2
## # A tibble: 57 x 6
## Type MD FY Cost FH Gallons
## * <chr> <chr> <int> <int> <int> <int>
## 1 Bomber B-1 1996 72753781 26914 88594449
## 2 Bomber B-1 1997 71297263 25219 85484074
## 3 Bomber B-1 1998 84026805 24205 85259038
## 4 Bomber B-1 1999 71848336 23306 79323816
## 5 Bomber B-1 2000 58439777 25013 86230284
## 6 Bomber B-1 2001 94946077 25059 86892432
## 7 Bomber B-1 2002 96458536 26581 89198262
## 8 Bomber B-1 2003 68650070 21491 74485788
## 9 Bomber B-1 2004 101895634 28118 101397707
## 10 Bomber B-1 2005 124816690 21859 78410415
## # ... with 47 more rows
bomber_combined <- as_tibble(import("bomber_combined.rds"))
bomber_combined_2 <- bomber_combined %>% separate(AC,into = c("Type","MD"),sep=" ")
bomber_combined_2
## # A tibble: 57 x 6
## Type MD FY Cost FH Gallons
## * <chr> <chr> <int> <int> <int> <int>
## 1 Bomber B-1 1996 72753781 26914 88594449
## 2 Bomber B-1 1997 71297263 25219 85484074
## 3 Bomber B-1 1998 84026805 24205 85259038
## 4 Bomber B-1 1999 71848336 23306 79323816
## 5 Bomber B-1 2000 58439777 25013 86230284
## 6 Bomber B-1 2001 94946077 25059 86892432
## 7 Bomber B-1 2002 96458536 26581 89198262
## 8 Bomber B-1 2003 68650070 21491 74485788
## 9 Bomber B-1 2004 101895634 28118 101397707
## 10 Bomber B-1 2005 124816690 21859 78410415
## # ... with 47 more rows
bomber_prefix <- as_tibble(import("bomber_prefix.rds"))
bomber_prefix_2 <- bomber_prefix %>% unite(MD,prefix,number,sep="-")
bomber_prefix_2
## # A tibble: 171 x 5
## Type MD FY Output Value
## * <chr> <chr> <int> <chr> <int>
## 1 Bomber B-1 1996 FH 26914
## 2 Bomber B-1 1997 FH 25219
## 3 Bomber B-1 1998 FH 24205
## 4 Bomber B-1 1999 FH 23306
## 5 Bomber B-1 2000 FH 25013
## 6 Bomber B-1 2001 FH 25059
## 7 Bomber B-1 2002 FH 26581
## 8 Bomber B-1 2003 FH 21491
## 9 Bomber B-1 2004 FH 28118
## 10 Bomber B-1 2005 FH 21859
## # ... with 161 more rows
bomber_mess <- as_tibble(import("bomber_mess.rds"))
bomber_mess_2 <- bomber_mess %>%
unite(MD,prefix,number,sep="-") %>%
separate(Metric,into = c("FY","Metric1"),sep="_") %>%
spread(key = "Metric1" ,value = "Value")
bomber_mess_2
## # A tibble: 57 x 6
## Type MD FY Cost FH Gallons
## * <chr> <chr> <chr> <int> <int> <int>
## 1 Bomber B-1 1996 72753781 26914 88594449
## 2 Bomber B-1 1997 71297263 25219 85484074
## 3 Bomber B-1 1998 84026805 24205 85259038
## 4 Bomber B-1 1999 71848336 23306 79323816
## 5 Bomber B-1 2000 58439777 25013 86230284
## 6 Bomber B-1 2001 94946077 25059 86892432
## 7 Bomber B-1 2002 96458536 26581 89198262
## 8 Bomber B-1 2003 68650070 21491 74485788
## 9 Bomber B-1 2004 101895634 28118 101397707
## 10 Bomber B-1 2005 124816690 21859 78410415
## # ... with 47 more rows
bomber_mess_3 <- bomber_mess %>%
unite(MD,prefix,number,sep="-") %>%
separate(Metric,into = c("FY","Metric1"),sep="_")
ggplot(data=bomber_mess_3, aes(x=FY, y=Value,group=MD, colour = MD)) +
geom_line() + geom_point() + facet_grid(Metric1~., scales = "free") +
ggtitle("Historical data for mission aircrafts") +
labs(y="Value", x="Year")
ws_programmatics <- as_tibble(import("ws_programmatics.rds"))
ws_categorization <- as_tibble(import("ws_categorizations.rds"))
ws_temp <- ws_programmatics %>%
left_join(ws_categorization,by=c("Base","MD")) %>%
filter(FY == 2014 & Base == "MINOT AFB (ND)") %>%
filter(System=="AIRCRAFT" | System=="MISSILES") %>%
group_by(System) %>%
summarise(Total_Sum = sum(Total_O.S,na.rm=TRUE),Total_end_Strength = sum(End_Strength,na.rm=TRUE))
ws_temp
## # A tibble: 2 x 3
## System Total_Sum Total_end_Strength
## <chr> <dbl> <dbl>
## 1 AIRCRAFT 297398235 2023
## 2 MISSILES 112224909 1951
ws_temp_top10 <- ws_programmatics %>%
left_join(ws_categorization,by=c("Base","MD")) %>%
filter(FY == 2014) %>%
group_by(Base) %>%
summarise(CPFH = sum(Total_O.S,na.rm=TRUE)/sum(FH,na.rm=TRUE)) %>%
filter(CPFH != Inf) %>%
arrange(desc(CPFH)) %>%
head(n=10) %>%
select(Base,CPFH)
ws_temp_top10 %>% ggplot() +
geom_bar(mapping = aes(x = reorder(Base,CPFH),y=CPFH,fill = Base),stat="Identity") +
coord_flip() +
ggtitle(" Top 10 bases with the largest cost per flying hours") +
labs(y="Cost/Flying Hours", x="Bases")
ws_FY <- ws_programmatics %>%
left_join(ws_categorization,by=c("Base","MD")) %>%
group_by(FY) %>%
summarise(cost=sum(Total_O.S,na.rm=TRUE),tot_ES=sum(End_Strength,na.rm=TRUE)) %>%
select(FY,cost,tot_ES)
ws_FY%>%
ggplot() +
geom_point(mapping = aes(x = tot_ES, y = cost,colour = FY))
ws_System <- ws_programmatics %>%
left_join(ws_categorization, by=c("Base","MD")) %>%
group_by(System) %>%
summarise(cost=sum(Total_O.S,na.rm=TRUE),tot_ES=sum(End_Strength,na.rm=TRUE)) %>%
select(System,cost,tot_ES)
ws_System%>%
ggplot() +
geom_point(mapping = aes(x = tot_ES, y = cost,colour = System))
ws_Base <- ws_programmatics %>%
left_join(ws_categorization,by=c("Base","MD")) %>%
group_by(Base) %>%
summarise(cost=sum(Total_O.S,na.rm=TRUE),tot_ES=sum(End_Strength,na.rm=TRUE)) %>%
select(Base,cost,tot_ES)
ws_Base%>%
ggplot() +
geom_point(mapping = aes(x = tot_ES, y = cost,colour = Base))