This is the week-5 homework for the BANA 8090 Data Wrangling With R course. This week focuses on tidy and relational data.
The following packages are required for this homework
library(ggplot2) #Package to produce complex multi-layered graphs in R
library(tidyverse) #Set of packages including dpylr and ggplot
The markdown is based on seven data sets that contains information about United States Air Force bomber aircrafts. Following variables are parts of these data sets:
Type: Represents the type of Bomber aircrafts
MD: Combination of basic mission of the aircraft and the design number
FY: Fiscal year from 1996-2014
Cost: Represents the total operational cost for flying, maintaining, repairing, and managing the aircraft
FH: Represents the flying hours, which is the total time the aircraft is in the air flying a mission or in training
Gallons: Represents the total gallons of fuel burned by the aircraft
Note: The data sets contains no missing values or empty rows.
bomber_wide <- readRDS("C:/Manisha_Arora/UC-BANA/Sem1/Data Wrangling R - BB/Data Wrangling with R (BANA 8090)/Week-5/data/bomber_wide.rds")
as_tibble(bomber_wide)
#Gather all the years from 1996 to 2014 into "Year" column and values in "FH" column
bomber_wide2 <- as_tibble(bomber_wide %>%
gather("Year","FH",3:21))
#Counting complete cases
bomber_wide2[!complete.cases(bomber_wide2),]
bomber_long <- readRDS("C:/Manisha_Arora/UC-BANA/Sem1/Data Wrangling R - BB/Data Wrangling with R (BANA 8090)/Week-5/data/bomber_long.rds")
as_tibble(bomber_long)
#Spreading the output in "Output" column and associated Values
bomber_long2 <- as_tibble(bomber_long %>%
spread(key="Output", value="Value"))
bomber_long2
bomber_combined <- readRDS("C:/Manisha_Arora/UC-BANA/Sem1/Data Wrangling R - BB/Data Wrangling with R (BANA 8090)/Week-5/data/bomber_combined.rds")
as_tibble(bomber_combined)
#Separating "AC" column into "Type" and "MD" columns
bomber_combined2 <- as_tibble(bomber_combined %>%
separate(AC, into=c("Type","MD"),sep=" "))
bomber_combined2
bomber_prefix <- readRDS("C:/Manisha_Arora/UC-BANA/Sem1/Data Wrangling R - BB/Data Wrangling with R (BANA 8090)/Week-5/data/bomber_prefix.rds")
as_tibble(bomber_prefix)
#Combining "prefix"and "number" columns into "MD"column
bomber_prefix2 <- as_tibble(bomber_prefix %>%
unite(MD,prefix,number,sep="-"))
bomber_prefix2
bomber_mess <- readRDS("C:/Manisha_Arora/UC-BANA/Sem1/Data Wrangling R - BB/Data Wrangling with R (BANA 8090)/Week-5/data/bomber_mess.rds")
as_tibble(bomber_mess)
bomber_mess2 <- as_tibble(bomber_mess %>%
unite(MD,prefix,number,sep="-") %>%
separate(Metric, into=c("FY","Metric_new"),sep="_") %>%
spread(key="Metric_new", value="Value") )
bomber_mess %>%
unite(MD,prefix,number,sep="-") %>%
separate(Metric, into=c("FY","Metric_new"),sep="_") %>%
ggplot(aes(x=FY, y=Value, group=MD, color=MD)) +
geom_line() +
geom_point() +
facet_grid(Metric_new~., scales = "free") +
xlab("Year") +
theme(axis.text=element_text(size=6),
axis.title=element_text(size=6))
ws_p <- readRDS("C:/Manisha_Arora/UC-BANA/Sem1/Data Wrangling R - BB/Data Wrangling with R (BANA 8090)/Week-5/data/ws_programmatics.rds")
as_tibble(ws_p)
ws_c <- readRDS("C:/Manisha_Arora/UC-BANA/Sem1/Data Wrangling R - BB/Data Wrangling with R (BANA 8090)/Week-5/data/ws_categorizations.rds")
as_tibble(ws_c)
ws_p %>%
full_join(ws_c) %>%
subset(FY==2014 & Base=="MINOT AFB (ND)") %>%
subset(System=="AIRCRAFT" | System=="MISSILES") %>%
group_by(System) %>%
summarise(sumTotal = sum(Total_O.S),
sumStrength = sum(End_Strength, na.rm=TRUE))
ws_pc <- as_tibble(ws_p %>%
full_join(ws_c) %>%
group_by(Base) %>%
summarize(max_cpfh = max(Total_O.S/FH,na.rm=TRUE)) %>%
subset(max_cpfh != -Inf) %>%
arrange(desc(max_cpfh)))
ws_pc[1,]
ggplot(data=ws_pc[1:10,]) +
geom_bar(mapping=aes(x=reorder(Base,max_cpfh), y=max_cpfh),stat = "identity") +
coord_flip() +
xlab("CPFH") + ylab("Base") +
ggtitle("Plot of top 10 CPHF values")
ws_p %>%
full_join(ws_c) %>%
gather("Metric","Value",c(FY,System,FH)) %>%
group_by(Metric) %>%
ggplot(mapping=aes(x=End_Strength,y=Total_O.S,group=Metric,color=Metric)) +
geom_point() + facet_grid(~Metric)