Synopsis

In this Week 5 Homework, we Work through various datasets, tidy them up using various functions in R and derive it in the required format.Finally we visualize the relationships present between a few variables in the datasets using ggplot2.

Packages Required

The following packages are required:

library (tidyverse) #To use dplyr for various functions through our analysis and ggplot for the visualization

Sourcecode

We work with 7 datasets in here: ws_programmatics.rds bomber_combined.rds bomber_long.rds bomber_mess.rds bomber_prefix.rds bomber_wide.rds ws_categorizations.rds

Among them they contain several combinations of the variables, which are explained in the codebook below.

Link to the data codebook:http://uc-r.github.io/data_wrangling/week-5-assignment-data

Exercises

#1

BW <- readRDS( "bomber_wide.rds" )
BWT <- as_tibble ( BW )
BWT %>% 
  gather(Year,FH,`1996`:`2014`)
## # A tibble: 57 × 4
##      Type    MD  Year    FH
##     <chr> <chr> <chr> <int>
## 1  Bomber   B-1  1996 26914
## 2  Bomber   B-2  1996  2364
## 3  Bomber  B-52  1996 28511
## 4  Bomber   B-1  1997 25219
## 5  Bomber   B-2  1997  2776
## 6  Bomber  B-52  1997 26034
## 7  Bomber   B-1  1998 24205
## 8  Bomber   B-2  1998  2166
## 9  Bomber  B-52  1998 25639
## 10 Bomber   B-1  1999 23306
## # ... with 47 more rows
#2

BL <- readRDS ( "bomber_long.rds" )
BLT <- as_tibble ( BL )
BLT %>% 
  spread ( key = "Output",value = "Value" )
## # A tibble: 57 × 6
##      Type    MD    FY      Cost    FH   Gallons
## *   <chr> <chr> <int>     <int> <int>     <int>
## 1  Bomber   B-1  1996  72753781 26914  88594449
## 2  Bomber   B-1  1997  71297263 25219  85484074
## 3  Bomber   B-1  1998  84026805 24205  85259038
## 4  Bomber   B-1  1999  71848336 23306  79323816
## 5  Bomber   B-1  2000  58439777 25013  86230284
## 6  Bomber   B-1  2001  94946077 25059  86892432
## 7  Bomber   B-1  2002  96458536 26581  89198262
## 8  Bomber   B-1  2003  68650070 21491  74485788
## 9  Bomber   B-1  2004 101895634 28118 101397707
## 10 Bomber   B-1  2005 124816690 21859  78410415
## # ... with 47 more rows
#3

BC <- readRDS ( "bomber_combined.rds" )
BCT <- as_tibble ( BC )
BCT %>%
  separate (AC, into = c("Type","MD"),sep=" ")
## # A tibble: 57 × 6
##      Type    MD    FY      Cost    FH   Gallons
## *   <chr> <chr> <int>     <int> <int>     <int>
## 1  Bomber   B-1  1996  72753781 26914  88594449
## 2  Bomber   B-1  1997  71297263 25219  85484074
## 3  Bomber   B-1  1998  84026805 24205  85259038
## 4  Bomber   B-1  1999  71848336 23306  79323816
## 5  Bomber   B-1  2000  58439777 25013  86230284
## 6  Bomber   B-1  2001  94946077 25059  86892432
## 7  Bomber   B-1  2002  96458536 26581  89198262
## 8  Bomber   B-1  2003  68650070 21491  74485788
## 9  Bomber   B-1  2004 101895634 28118 101397707
## 10 Bomber   B-1  2005 124816690 21859  78410415
## # ... with 47 more rows
#4

BP <- readRDS ( "bomber_prefix.rds" )
BPT <- as_tibble ( BP )
BPT %>% 
  unite(MD,prefix,number,sep="-") %>% 
  spread ( key = "Output",value = "Value" )
## # A tibble: 57 × 6
##      Type    MD    FY      Cost    FH   Gallons
## *   <chr> <chr> <int>     <int> <int>     <int>
## 1  Bomber   B-1  1996  72753781 26914  88594449
## 2  Bomber   B-1  1997  71297263 25219  85484074
## 3  Bomber   B-1  1998  84026805 24205  85259038
## 4  Bomber   B-1  1999  71848336 23306  79323816
## 5  Bomber   B-1  2000  58439777 25013  86230284
## 6  Bomber   B-1  2001  94946077 25059  86892432
## 7  Bomber   B-1  2002  96458536 26581  89198262
## 8  Bomber   B-1  2003  68650070 21491  74485788
## 9  Bomber   B-1  2004 101895634 28118 101397707
## 10 Bomber   B-1  2005 124816690 21859  78410415
## # ... with 47 more rows
#5

BM <- readRDS ( "bomber_mess.rds" )
BMT <- as_tibble (BM)
BMTP<-BMT %>% 
  unite(MD,prefix,number,sep="-") %>% 
  separate (Metric, into = c("FY","Output"),sep="_") %>% 
  spread ( key = "Output",value = "Value" ) 
  
BMTP %>% 
  gather(Cost,FH,Gallons, key = "Output", value = "Value" ) %>% 
ggplot(aes(x = FY,y = Value, group = MD,colour=MD)) +
  geom_line()+facet_grid (Output~.,scales="free")

#6

WSCat<-as_tibble (readRDS ( "ws_categorizations.rds" ))
WsProg<-as_tibble (readRDS ( "ws_programmatics.rds" ))
WsProg$Total_O.S<-replace(WsProg$Total_O.S,is.na(WsProg$Total_O.S),0)
WsProg$End_Strength<-replace(WsProg$End_Strength,is.na(WsProg$End_Strength),0)
WsProg %>% 
  left_join(.,WSCat,by=c("Base","MD")) %>% 
  filter(FY==2014,Base=='MINOT AFB (ND)') %>% 
  filter(System %in% c("AIRCRAFT","MISSILES")) %>% 
  group_by(System) %>% 
  mutate(Total=sum(Total_O.S,End_Strength))
## Source: local data frame [8 x 20]
## Groups: System [2]
## 
##             Base     MD    FY Manpower_Ops Manpower_Mx
##            <chr>  <chr> <int>        <dbl>       <dbl>
## 1 MINOT AFB (ND)   B-52  2014     30526714    96851312
## 2 MINOT AFB (ND)    E-4  2014           NA       92794
## 3 MINOT AFB (ND)  GB-52  2014           NA          NA
## 4 MINOT AFB (ND) OC-135  2014           NA          NA
## 5 MINOT AFB (ND)   T-38  2014           NA          NA
## 6 MINOT AFB (ND)   UH-1  2014      3984555      277855
## 7 MINOT AFB (ND) AGM-86  2014           NA    19789965
## 8 MINOT AFB (ND) LGM-30  2014     31565144    31425933
## # ... with 15 more variables: Manpower_Support_Staff <dbl>,
## #   Operating_Material <dbl>, Mx_Consumables <dbl>, Mx_DLR <dbl>,
## #   Mx_Depot_AC <dbl>, Mx_Depot_Missile <dbl>, Mx_Depot_Engine <dbl>,
## #   CLS <dbl>, Total_O.S <dbl>, Avg_Inv <dbl>, TAI <dbl>,
## #   End_Strength <dbl>, FH <dbl>, System <chr>, Total <dbl>
#7

WsProg %>% 
  left_join(.,WSCat,by=c("Base","MD")) %>%  
  filter(FY==2014) %>%
  mutate (CPFH = Total_O.S/FH) %>%
  arrange (desc(CPFH)) %>%
   group_by(Base) %>%
  top_n (n=1) %>%
  head (10) %>% 
ggplot() +
  geom_bar(mapping=aes(x=reorder(Base,CPFH), y=CPFH),stat = "identity",colour = 'darkblue', fill = 'lightblue') +
  coord_flip() + labs(x= 'Cost per flying hour', y = 'Bases')
## Selecting by CPFH

#8

W<-WsProg %>% 
  left_join(.,WSCat,by=c("Base","MD"))

ggplot(data=W)+geom_point( mapping=aes(x=End_Strength, y=Total_O.S,na.rm=TRUE))
## Warning: Ignoring unknown aesthetics: na.rm

ggplot(data=W)+
geom_point( mapping=aes(x=End_Strength, y=Total_O.S,na.rm=TRUE,color=factor(FY)))
## Warning: Ignoring unknown aesthetics: na.rm

ggplot(data=W)+geom_point( mapping=aes(x=End_Strength, y=Total_O.S,na.rm=TRUE, color=System, alpha=0.25))
## Warning: Ignoring unknown aesthetics: na.rm

ggplot(data=W)+geom_point( mapping=aes(x=End_Strength, y=Total_O.S,na.rm=TRUE, color=FH,alpha=0.10))
## Warning: Ignoring unknown aesthetics: na.rm