Synopsis
This HTML document is created from the associated R Markdown file. In this assignment, I have organized data in various tibbles, turning them into more userful form using functions of tidyverse package. I have also done operations on relational data to perform analysis on more than one tables at one time.
Packages Required
Package(s) used in this assignment to exceute R code are mentioned below:
library(ggplot2) #Package to produce complex multi-layered graphs in R
library(tidyverse) #Set of packages including dpylr and ggplotSource Code
The markdown is based on seven data sets that contains information about United States Air Force bomber aircrafts. Following variables are parts of these data sets:
Type: Represents the type of Bomber aircrafts
MD: Combination of basic mission of the aircraft and the design number
FY: Fiscal year from 1996-2014
Cost: Represents the total operational cost for flying, maintaining, repairing, and managing the aircraft
FH: Represents the flying hours, which is the total time the aircraft is in the air flying a mission or in training
Gallons: Represents the total gallons of fuel burned by the aircraft
Note: The data sets contains no missing values or empty rows.
Data Analysis
Analysis 1
bomber_wide <- readRDS("bomber_wide.rds")
#Gather all the years from 1996 to 2014 into "Year" column and values in "FH" column
bomber_wide_modified <- bomber_wide %>% gather(3:21,key = "Year" ,value = "FH")
as_tibble(bomber_wide_modified)## # A tibble: 57 × 4
## Type MD Year FH
## <chr> <chr> <chr> <int>
## 1 Bomber B-1 1996 26914
## 2 Bomber B-2 1996 2364
## 3 Bomber B-52 1996 28511
## 4 Bomber B-1 1997 25219
## 5 Bomber B-2 1997 2776
## 6 Bomber B-52 1997 26034
## 7 Bomber B-1 1998 24205
## 8 Bomber B-2 1998 2166
## 9 Bomber B-52 1998 25639
## 10 Bomber B-1 1999 23306
## # ... with 47 more rows
#Counting missing values
sum(is.na(bomber_wide_modified$FH))## [1] 0
# See what all rows have incomplete data
bomber_wide_modified[!complete.cases(bomber_wide_modified),]## [1] Type MD Year FH
## <0 rows> (or 0-length row.names)
Analysis 2
bomber_long <- as_tibble(readRDS("bomber_long.rds"))
#Spreading the output in "Output" column and associated Values
bomber_long_modified <- bomber_long %>% spread(key = "Output" ,value = "Value")
bomber_long_modified## # A tibble: 57 × 6
## Type MD FY Cost FH Gallons
## * <chr> <chr> <int> <int> <int> <int>
## 1 Bomber B-1 1996 72753781 26914 88594449
## 2 Bomber B-1 1997 71297263 25219 85484074
## 3 Bomber B-1 1998 84026805 24205 85259038
## 4 Bomber B-1 1999 71848336 23306 79323816
## 5 Bomber B-1 2000 58439777 25013 86230284
## 6 Bomber B-1 2001 94946077 25059 86892432
## 7 Bomber B-1 2002 96458536 26581 89198262
## 8 Bomber B-1 2003 68650070 21491 74485788
## 9 Bomber B-1 2004 101895634 28118 101397707
## 10 Bomber B-1 2005 124816690 21859 78410415
## # ... with 47 more rows
Analysis 3
bomber_combined <- as_tibble(readRDS("bomber_combined.rds"))
#Separating "AC" column into "Type" and "MD" columns
bomber_combined_modified <- bomber_combined %>% separate(AC,into = c("Type","MD"),sep=" ")
bomber_combined_modified## # A tibble: 57 × 6
## Type MD FY Cost FH Gallons
## * <chr> <chr> <int> <int> <int> <int>
## 1 Bomber B-1 1996 72753781 26914 88594449
## 2 Bomber B-1 1997 71297263 25219 85484074
## 3 Bomber B-1 1998 84026805 24205 85259038
## 4 Bomber B-1 1999 71848336 23306 79323816
## 5 Bomber B-1 2000 58439777 25013 86230284
## 6 Bomber B-1 2001 94946077 25059 86892432
## 7 Bomber B-1 2002 96458536 26581 89198262
## 8 Bomber B-1 2003 68650070 21491 74485788
## 9 Bomber B-1 2004 101895634 28118 101397707
## 10 Bomber B-1 2005 124816690 21859 78410415
## # ... with 47 more rows
Analysis 4
bomber_prefix <- as_tibble(readRDS("bomber_prefix.rds"))
#Combining "prefix"and "number" columns into "MD"column
bomber_prefix_modified <- bomber_prefix %>% unite(MD,prefix,number,sep="-")
bomber_prefix_modified## # A tibble: 171 × 5
## Type MD FY Output Value
## * <chr> <chr> <int> <chr> <int>
## 1 Bomber B-1 1996 FH 26914
## 2 Bomber B-1 1997 FH 25219
## 3 Bomber B-1 1998 FH 24205
## 4 Bomber B-1 1999 FH 23306
## 5 Bomber B-1 2000 FH 25013
## 6 Bomber B-1 2001 FH 25059
## 7 Bomber B-1 2002 FH 26581
## 8 Bomber B-1 2003 FH 21491
## 9 Bomber B-1 2004 FH 28118
## 10 Bomber B-1 2005 FH 21859
## # ... with 161 more rows
Analysis 5
bomber_mess <- as_tibble(readRDS("bomber_mess.rds"))
bomber_mess_modified <- bomber_mess %>%
unite(MD,prefix,number,sep="-") %>%
separate(Metric,into = c("FY","Metric1"),sep="_") %>%
spread(key = "Metric1" ,value = "Value")
bomber_mess_modified## # A tibble: 57 × 6
## Type MD FY Cost FH Gallons
## * <chr> <chr> <chr> <int> <int> <int>
## 1 Bomber B-1 1996 72753781 26914 88594449
## 2 Bomber B-1 1997 71297263 25219 85484074
## 3 Bomber B-1 1998 84026805 24205 85259038
## 4 Bomber B-1 1999 71848336 23306 79323816
## 5 Bomber B-1 2000 58439777 25013 86230284
## 6 Bomber B-1 2001 94946077 25059 86892432
## 7 Bomber B-1 2002 96458536 26581 89198262
## 8 Bomber B-1 2003 68650070 21491 74485788
## 9 Bomber B-1 2004 101895634 28118 101397707
## 10 Bomber B-1 2005 124816690 21859 78410415
## # ... with 47 more rows
bomber_mess_modified1 <- bomber_mess %>%
unite(MD,prefix,number,sep="-") %>%
separate(Metric,into = c("FY","Metric1"),sep="_")
ggplot(data=bomber_mess_modified1, aes(x=FY, y=Value,group=MD, colour = MD)) +
geom_line() + geom_point() + facet_grid(Metric1~., scales = "free") +
ggtitle("Historical trends of metrics for mission design aircrafts") +
labs(y="Value", x="Year") +
theme(axis.text=element_text(size=6),axis.title=element_text(size=6))Analysis 6
ws_programmatics <- as_tibble(readRDS("ws_programmatics.rds"))
ws_categorization <- as_tibble(readRDS("ws_categorizations.rds"))
ws_temp <- ws_programmatics %>%
left_join(ws_categorization,by=c("Base","MD")) %>%
filter(FY == 2014 & Base == "MINOT AFB (ND)") %>%
filter(System=="AIRCRAFT" | System=="MISSILES") %>%
group_by(System) %>%
summarise(Total_Sum = sum(Total_O.S,na.rm=TRUE),Total_end_Strength = sum(End_Strength,na.rm=TRUE))
ws_temp## # A tibble: 2 × 3
## System Total_Sum Total_end_Strength
## <chr> <dbl> <dbl>
## 1 AIRCRAFT 297398235 2023
## 2 MISSILES 112224909 1951
Analysis 7
ws_temp_top10 <- ws_programmatics %>%
left_join(ws_categorization,by=c("Base","MD")) %>%
filter(FY == 2014) %>%
group_by(Base) %>%
summarise(CPFH = sum(Total_O.S,na.rm=TRUE)/sum(FH,na.rm=TRUE)) %>%
filter(CPFH != Inf) %>%
arrange(desc(CPFH)) %>%
head(n=10) %>%
select(Base,CPFH)
ws_temp_top10 %>% ggplot() +
geom_bar(mapping = aes(x = reorder(Base,CPFH),y=CPFH,fill = Base),stat="Identity") +
coord_flip() +
ggtitle(" Top 10 bases with the largest cost per flying hours") +
labs(y="Cost/Flying Hours", x="Bases")Analysis 8
ws_join <- ws_programmatics %>%
left_join(ws_categorization,by=c("Base","MD")) %>%
gather("Metric","Value",c(FY,FH,System)) %>%
group_by(Metric) %>%
ggplot(mapping=aes(x=End_Strength,y=Total_O.S,group=Metric,color=Metric)) +
geom_point() +
facet_grid(~Metric) +
labs(y="Total O.S", x="End Strength")
ws_join