Synopsis

This is the week-5 homework for the BANA 8090 Data Wrangling With R course. This week focuses on tidy and relational data.

Packages required

The following packages are required for this homework

library(ggplot2)  #Package to produce complex multi-layered graphs in R
library(tidyverse) #Set of packages including dpylr and ggplot

Source Code

The markdown is based on seven data sets that contains information about United States Air Force bomber aircrafts. Following variables are parts of these data sets:

  1. Type: Represents the type of Bomber aircrafts

  2. MD: Combination of basic mission of the aircraft and the design number

  3. FY: Fiscal year from 1996-2014

  4. Cost: Represents the total operational cost for flying, maintaining, repairing, and managing the aircraft

  5. FH: Represents the flying hours, which is the total time the aircraft is in the air flying a mission or in training

  6. Gallons: Represents the total gallons of fuel burned by the aircraft

Note: The data sets contains no missing values or empty rows.

Data Analysis

Question 1

bomber_wide <- readRDS("C:/Manisha_Arora/UC-BANA/Sem1/Data Wrangling R - BB/Data Wrangling with R (BANA 8090)/Week-5/data/bomber_wide.rds")
as_tibble(bomber_wide)
#Gather all the years from 1996 to 2014 into "Year" column and values in "FH" column
bomber_wide2 <- as_tibble(bomber_wide %>% 
                            gather("Year","FH",3:21))

#Counting complete cases
bomber_wide2[!complete.cases(bomber_wide2),]

Question 2

bomber_long <- readRDS("C:/Manisha_Arora/UC-BANA/Sem1/Data Wrangling R - BB/Data Wrangling with R (BANA 8090)/Week-5/data/bomber_long.rds")
as_tibble(bomber_long)
#Spreading the output in "Output" column and associated Values
bomber_long2 <- as_tibble(bomber_long %>% 
                            spread(key="Output", value="Value"))
bomber_long2

Question 3

bomber_combined <- readRDS("C:/Manisha_Arora/UC-BANA/Sem1/Data Wrangling R - BB/Data Wrangling with R (BANA 8090)/Week-5/data/bomber_combined.rds")
as_tibble(bomber_combined)
#Separating "AC" column into "Type" and "MD" columns
bomber_combined2 <- as_tibble(bomber_combined %>% 
                                separate(AC, into=c("Type","MD"),sep=" "))
bomber_combined2

Question 4

bomber_prefix <- readRDS("C:/Manisha_Arora/UC-BANA/Sem1/Data Wrangling R - BB/Data Wrangling with R (BANA 8090)/Week-5/data/bomber_prefix.rds")
as_tibble(bomber_prefix)
#Combining "prefix"and "number" columns into "MD"column
bomber_prefix2 <- as_tibble(bomber_prefix %>% 
                              unite(MD,prefix,number,sep="-"))
bomber_prefix2

Question 5

bomber_mess <- readRDS("C:/Manisha_Arora/UC-BANA/Sem1/Data Wrangling R - BB/Data Wrangling with R (BANA 8090)/Week-5/data/bomber_mess.rds")
as_tibble(bomber_mess)
bomber_mess2 <- as_tibble(bomber_mess %>%
                  unite(MD,prefix,number,sep="-") %>%
                    separate(Metric, into=c("FY","Metric_new"),sep="_") %>%
                      spread(key="Metric_new", value="Value") )

bomber_mess %>%
      unite(MD,prefix,number,sep="-") %>%
          separate(Metric, into=c("FY","Metric_new"),sep="_") %>%
            ggplot(aes(x=FY, y=Value, group=MD, color=MD)) +
              geom_line() +
                geom_point() +
                  facet_grid(Metric_new~., scales = "free") +
                    xlab("Year") +
                      theme(axis.text=element_text(size=6),
                            axis.title=element_text(size=6))

Question 6

ws_p <- readRDS("C:/Manisha_Arora/UC-BANA/Sem1/Data Wrangling R - BB/Data Wrangling with R (BANA 8090)/Week-5/data/ws_programmatics.rds")
as_tibble(ws_p)
ws_c <- readRDS("C:/Manisha_Arora/UC-BANA/Sem1/Data Wrangling R - BB/Data Wrangling with R (BANA 8090)/Week-5/data/ws_categorizations.rds")
as_tibble(ws_c)
ws_p %>%
  full_join(ws_c) %>%
    subset(FY==2014 & Base=="MINOT AFB (ND)") %>%
      subset(System=="AIRCRAFT" | System=="MISSILES") %>%
        group_by(System) %>%
          summarise(sumTotal = sum(Total_O.S),
                    sumStrength = sum(End_Strength, na.rm=TRUE))

Question 7

ws_pc <- as_tibble(ws_p %>%
  full_join(ws_c) %>%
    group_by(Base) %>%
      summarize(max_cpfh = max(Total_O.S/FH,na.rm=TRUE)) %>%
        subset(max_cpfh != -Inf) %>% 
          arrange(desc(max_cpfh)))
ws_pc[1,]
ggplot(data=ws_pc[1:10,]) +
  geom_bar(mapping=aes(x=reorder(Base,max_cpfh), y=max_cpfh),stat = "identity") +
  coord_flip() +
  xlab("CPFH") + ylab("Base") +
  ggtitle("Plot of top 10 CPHF values")

Question 8

ws_p %>%
  full_join(ws_c) %>%
    gather("Metric","Value",c(FY,System,FH)) %>%
      group_by(Metric) %>%
        ggplot(mapping=aes(x=End_Strength,y=Total_O.S,group=Metric,color=Metric)) +
          geom_point() + facet_grid(~Metric)