Synopsis

This is my homework report for week 5, produced with R Markdown. In this homework I am working on tibbles and learning how clean data and making data tidy

Packages Required

library(dplyr)## used to manupulate data
library("tibble") ## used to create tibbles
library(rio) ## used to read rdm file
library(tidyverse)#group of packages used to summarise and visualize data
library(tidyr) ## functions which can be used to make data tidy

Question 1

setwd("C:/tauseef/data_wrangling/Data Wrangling with R (BANA 8090)")
bomber_wide<-import("bomber_wide.rds")
bomber_wide_t<- as_tibble(bomber_wide)
bomber_wide_g<-bomber_wide_t %>% 
  gather(key="year",value="FH",match('1996':'2014', names(.)))
bomber_wide_g
## # A tibble: 57 × 4
##      Type    MD  year    FH
##     <chr> <chr> <chr> <int>
## 1  Bomber   B-1  1996 26914
## 2  Bomber   B-2  1996  2364
## 3  Bomber  B-52  1996 28511
## 4  Bomber   B-1  1997 25219
## 5  Bomber   B-2  1997  2776
## 6  Bomber  B-52  1997 26034
## 7  Bomber   B-1  1998 24205
## 8  Bomber   B-2  1998  2166
## 9  Bomber  B-52  1998 25639
## 10 Bomber   B-1  1999 23306
## # ... with 47 more rows

Question 2

bomber_long<-import("bomber_long.rds")
bomber_long_t<- as_tibble(bomber_long)
bomber_long_g<-bomber_long_t %>% 
  spread(key=Output,value=Value)
bomber_long_g
## # A tibble: 57 × 6
##      Type    MD    FY      Cost    FH   Gallons
## *   <chr> <chr> <int>     <int> <int>     <int>
## 1  Bomber   B-1  1996  72753781 26914  88594449
## 2  Bomber   B-1  1997  71297263 25219  85484074
## 3  Bomber   B-1  1998  84026805 24205  85259038
## 4  Bomber   B-1  1999  71848336 23306  79323816
## 5  Bomber   B-1  2000  58439777 25013  86230284
## 6  Bomber   B-1  2001  94946077 25059  86892432
## 7  Bomber   B-1  2002  96458536 26581  89198262
## 8  Bomber   B-1  2003  68650070 21491  74485788
## 9  Bomber   B-1  2004 101895634 28118 101397707
## 10 Bomber   B-1  2005 124816690 21859  78410415
## # ... with 47 more rows

Question 3

You can also embed plots, for example:

bomber_combined<-import("bomber_combined.rds")
bomber_combined_t<- as_tibble(bomber_combined)
bomber_combined_g<-bomber_combined_t %>% 
  separate(AC,into=c("Type","MD"),sep=' ',convert = TRUE)
bomber_combined_g
## # A tibble: 57 × 6
##      Type    MD    FY      Cost    FH   Gallons
## *   <chr> <chr> <int>     <int> <int>     <int>
## 1  Bomber   B-1  1996  72753781 26914  88594449
## 2  Bomber   B-1  1997  71297263 25219  85484074
## 3  Bomber   B-1  1998  84026805 24205  85259038
## 4  Bomber   B-1  1999  71848336 23306  79323816
## 5  Bomber   B-1  2000  58439777 25013  86230284
## 6  Bomber   B-1  2001  94946077 25059  86892432
## 7  Bomber   B-1  2002  96458536 26581  89198262
## 8  Bomber   B-1  2003  68650070 21491  74485788
## 9  Bomber   B-1  2004 101895634 28118 101397707
## 10 Bomber   B-1  2005 124816690 21859  78410415
## # ... with 47 more rows

Question 4

You can also embed plots, for example:

bomber_prefix<-import("bomber_prefix.rds")
bomber_prefix_t<- as_tibble(bomber_prefix)
bomber_prefix_g<-bomber_prefix_t %>% 
  unite(MD,prefix,number,sep='-')
bomber_prefix_g
## # A tibble: 171 × 5
##      Type    MD    FY Output Value
## *   <chr> <chr> <int>  <chr> <int>
## 1  Bomber   B-1  1996     FH 26914
## 2  Bomber   B-1  1997     FH 25219
## 3  Bomber   B-1  1998     FH 24205
## 4  Bomber   B-1  1999     FH 23306
## 5  Bomber   B-1  2000     FH 25013
## 6  Bomber   B-1  2001     FH 25059
## 7  Bomber   B-1  2002     FH 26581
## 8  Bomber   B-1  2003     FH 21491
## 9  Bomber   B-1  2004     FH 28118
## 10 Bomber   B-1  2005     FH 21859
## # ... with 161 more rows

Question 5

You can also embed plots, for example:

bomber_mess<-import("bomber_mess.rds")
bomber_mess_t<- as_tibble(bomber_mess)
bomber_mess_g<-bomber_mess_t %>% 
  unite(MD,prefix,number,sep='-')%>%
  separate(Metric,into=c("FY","new"))%>%
  spread(key=new,value=Value)

bomber_mess_g_temp <- bomber_mess %>%
  unite(MD,prefix,number,sep="-") %>%
  separate(Metric,into = c("FY","new"),sep="_")

ggplot(data=bomber_mess_g_temp, aes(x=FY, y=Value,group=MD, colour = MD)) + 
  geom_line() + geom_point() + facet_grid(new~., scales = "free") +
  ggtitle("Historical trends of metrics for mission design aircrafts") +
  labs(y="Value", x="Year")

  bomber_mess_g
## # A tibble: 57 × 6
##      Type    MD    FY      Cost    FH   Gallons
## *   <chr> <chr> <chr>     <int> <int>     <int>
## 1  Bomber   B-1  1996  72753781 26914  88594449
## 2  Bomber   B-1  1997  71297263 25219  85484074
## 3  Bomber   B-1  1998  84026805 24205  85259038
## 4  Bomber   B-1  1999  71848336 23306  79323816
## 5  Bomber   B-1  2000  58439777 25013  86230284
## 6  Bomber   B-1  2001  94946077 25059  86892432
## 7  Bomber   B-1  2002  96458536 26581  89198262
## 8  Bomber   B-1  2003  68650070 21491  74485788
## 9  Bomber   B-1  2004 101895634 28118 101397707
## 10 Bomber   B-1  2005 124816690 21859  78410415
## # ... with 47 more rows

Question 6

ws_programmatics<-import("ws_programmatics.rds")
ws_programmatics_t<-as_tibble(ws_programmatics)
ws_categorizations<-import("ws_categorizations.rds")

ws_categorizations_t<-as_tibble(ws_categorizations)


ws_summary <- ws_programmatics_t %>% 
  left_join(ws_categorizations_t,by=c("Base","MD")) %>%
  filter(FY == 2014  & Base == "MINOT AFB (ND)") %>%
  filter(System=="AIRCRAFT" | System=="MISSILES") %>%
  group_by(System) %>%
  summarise(Total_Sum = sum(Total_O.S,na.rm=TRUE),Total_end_Strength = sum(End_Strength,na.rm=TRUE))
ws_summary
## # A tibble: 2 × 3
##     System Total_Sum Total_end_Strength
##      <chr>     <dbl>              <dbl>
## 1 AIRCRAFT 297398235               2023
## 2 MISSILES 112224909               1951

Question 7

ws_top_10_base <- ws_programmatics_t %>% 
  left_join(ws_categorizations_t,by=c("Base","MD")) %>%
  filter(FY == 2014) %>%
  group_by(Base) %>%
  summarise(CPFH = sum(Total_O.S,na.rm=TRUE)/sum(FH,na.rm=TRUE)) %>%
  filter(CPFH != Inf) %>%
  arrange(desc(CPFH)) %>%
  head(n=10) %>%
  select(Base,CPFH)

ws_top_10_base %>% ggplot() +
  geom_bar(mapping = aes(x = reorder(Base,CPFH),y=CPFH,fill = Base),stat="Identity") +
  coord_flip() +
  ggtitle(" Top 10 bases with the largest cost per flying hours") +
  labs(y="Cost/Flying Hours", x="Bases")

Question 8

ws_FY <- ws_programmatics_t %>% 
  left_join(ws_categorizations_t,by=c("Base","MD")) %>%
  group_by(FY) %>%
  summarise(cost=sum(Total_O.S,na.rm=TRUE),tot_ES=sum(End_Strength,na.rm=TRUE)) %>%
  select(FY,cost,tot_ES)


ws_FY%>%
ggplot() + 
  geom_point(mapping = aes(x = tot_ES, y = cost,colour = FY))+
  labs(y="Total cost of operating", x="total personnel (head count) operating and supporting the weapon system ")

ws_System <- ws_programmatics_t %>% 
  left_join(ws_categorizations_t,by=c("Base","MD")) %>%
  group_by(System) %>%
  summarise(cost=sum(Total_O.S,na.rm=TRUE),tot_ES=sum(End_Strength,na.rm=TRUE)) %>%
  select(System,cost,tot_ES)


ws_System%>%
  ggplot() + 
  geom_point(mapping = aes(x = tot_ES, y = cost,colour = System))+
labs(y="Total cost of operating", x="total personnel (head count) operating and supporting the weapon system ")

ws_Base <- ws_programmatics_t %>% 
  left_join(ws_categorizations_t,by=c("Base","MD")) %>%
  group_by(Base) %>%
  summarise(cost=sum(Total_O.S,na.rm=TRUE),tot_ES=sum(End_Strength,na.rm=TRUE)) %>%
  select(Base,cost,tot_ES)

ws_Base%>%
  ggplot() + 
  geom_point(mapping = aes(y = tot_ES, x = cost,colour = Base))+
labs(y="Total cost of operating", x="total personnel (head count) operating and supporting the weapon system ")