In this Week 5 Homework, we Work through various datasets, tidy them up using various functions in R and derive it in the required format.Finally we visualize the relationships present between a few variables in the datasets using ggplot2.
The following packages are required:
library (tidyverse) #To use dplyr for various functions through our analysis and ggplot for the visualization
We work with 7 datasets in here: ws_programmatics.rds bomber_combined.rds bomber_long.rds bomber_mess.rds bomber_prefix.rds bomber_wide.rds ws_categorizations.rds
Among them they contain several combinations of the variables, which are explained in the codebook below.
Link to the data codebook:http://uc-r.github.io/data_wrangling/week-5-assignment-data
#1
BW <- readRDS( "bomber_wide.rds" )
BWT <- as_tibble ( BW )
BWT %>%
gather(Year,FH,`1996`:`2014`)
## # A tibble: 57 × 4
## Type MD Year FH
## <chr> <chr> <chr> <int>
## 1 Bomber B-1 1996 26914
## 2 Bomber B-2 1996 2364
## 3 Bomber B-52 1996 28511
## 4 Bomber B-1 1997 25219
## 5 Bomber B-2 1997 2776
## 6 Bomber B-52 1997 26034
## 7 Bomber B-1 1998 24205
## 8 Bomber B-2 1998 2166
## 9 Bomber B-52 1998 25639
## 10 Bomber B-1 1999 23306
## # ... with 47 more rows
#2
BL <- readRDS ( "bomber_long.rds" )
BLT <- as_tibble ( BL )
BLT %>%
spread ( key = "Output",value = "Value" )
## # A tibble: 57 × 6
## Type MD FY Cost FH Gallons
## * <chr> <chr> <int> <int> <int> <int>
## 1 Bomber B-1 1996 72753781 26914 88594449
## 2 Bomber B-1 1997 71297263 25219 85484074
## 3 Bomber B-1 1998 84026805 24205 85259038
## 4 Bomber B-1 1999 71848336 23306 79323816
## 5 Bomber B-1 2000 58439777 25013 86230284
## 6 Bomber B-1 2001 94946077 25059 86892432
## 7 Bomber B-1 2002 96458536 26581 89198262
## 8 Bomber B-1 2003 68650070 21491 74485788
## 9 Bomber B-1 2004 101895634 28118 101397707
## 10 Bomber B-1 2005 124816690 21859 78410415
## # ... with 47 more rows
#3
BC <- readRDS ( "bomber_combined.rds" )
BCT <- as_tibble ( BC )
BCT %>%
separate (AC, into = c("Type","MD"),sep=" ")
## # A tibble: 57 × 6
## Type MD FY Cost FH Gallons
## * <chr> <chr> <int> <int> <int> <int>
## 1 Bomber B-1 1996 72753781 26914 88594449
## 2 Bomber B-1 1997 71297263 25219 85484074
## 3 Bomber B-1 1998 84026805 24205 85259038
## 4 Bomber B-1 1999 71848336 23306 79323816
## 5 Bomber B-1 2000 58439777 25013 86230284
## 6 Bomber B-1 2001 94946077 25059 86892432
## 7 Bomber B-1 2002 96458536 26581 89198262
## 8 Bomber B-1 2003 68650070 21491 74485788
## 9 Bomber B-1 2004 101895634 28118 101397707
## 10 Bomber B-1 2005 124816690 21859 78410415
## # ... with 47 more rows
#4
BP <- readRDS ( "bomber_prefix.rds" )
BPT <- as_tibble ( BP )
BPT %>%
unite(MD,prefix,number,sep="-") %>%
spread ( key = "Output",value = "Value" )
## # A tibble: 57 × 6
## Type MD FY Cost FH Gallons
## * <chr> <chr> <int> <int> <int> <int>
## 1 Bomber B-1 1996 72753781 26914 88594449
## 2 Bomber B-1 1997 71297263 25219 85484074
## 3 Bomber B-1 1998 84026805 24205 85259038
## 4 Bomber B-1 1999 71848336 23306 79323816
## 5 Bomber B-1 2000 58439777 25013 86230284
## 6 Bomber B-1 2001 94946077 25059 86892432
## 7 Bomber B-1 2002 96458536 26581 89198262
## 8 Bomber B-1 2003 68650070 21491 74485788
## 9 Bomber B-1 2004 101895634 28118 101397707
## 10 Bomber B-1 2005 124816690 21859 78410415
## # ... with 47 more rows
#5
BM <- readRDS ( "bomber_mess.rds" )
BMT <- as_tibble (BM)
BMTP<-BMT %>%
unite(MD,prefix,number,sep="-") %>%
separate (Metric, into = c("FY","Output"),sep="_") %>%
spread ( key = "Output",value = "Value" )
BMTP %>%
gather(Cost,FH,Gallons, key = "Output", value = "Value" ) %>%
ggplot(aes(x = FY,y = Value, group = MD,colour=MD)) +
geom_line()+facet_grid (Output~.,scales="free")
#6
WSCat<-as_tibble (readRDS ( "ws_categorizations.rds" ))
WsProg<-as_tibble (readRDS ( "ws_programmatics.rds" ))
WsProg$Total_O.S<-replace(WsProg$Total_O.S,is.na(WsProg$Total_O.S),0)
WsProg$End_Strength<-replace(WsProg$End_Strength,is.na(WsProg$End_Strength),0)
WsProg %>%
left_join(.,WSCat,by=c("Base","MD")) %>%
filter(FY==2014,Base=='MINOT AFB (ND)') %>%
filter(System %in% c("AIRCRAFT","MISSILES")) %>%
group_by(System) %>%
mutate(Total=sum(Total_O.S,End_Strength))
## Source: local data frame [8 x 20]
## Groups: System [2]
##
## Base MD FY Manpower_Ops Manpower_Mx
## <chr> <chr> <int> <dbl> <dbl>
## 1 MINOT AFB (ND) B-52 2014 30526714 96851312
## 2 MINOT AFB (ND) E-4 2014 NA 92794
## 3 MINOT AFB (ND) GB-52 2014 NA NA
## 4 MINOT AFB (ND) OC-135 2014 NA NA
## 5 MINOT AFB (ND) T-38 2014 NA NA
## 6 MINOT AFB (ND) UH-1 2014 3984555 277855
## 7 MINOT AFB (ND) AGM-86 2014 NA 19789965
## 8 MINOT AFB (ND) LGM-30 2014 31565144 31425933
## # ... with 15 more variables: Manpower_Support_Staff <dbl>,
## # Operating_Material <dbl>, Mx_Consumables <dbl>, Mx_DLR <dbl>,
## # Mx_Depot_AC <dbl>, Mx_Depot_Missile <dbl>, Mx_Depot_Engine <dbl>,
## # CLS <dbl>, Total_O.S <dbl>, Avg_Inv <dbl>, TAI <dbl>,
## # End_Strength <dbl>, FH <dbl>, System <chr>, Total <dbl>
#7
WsProg %>%
left_join(.,WSCat,by=c("Base","MD")) %>%
filter(FY==2014) %>%
mutate (CPFH = Total_O.S/FH) %>%
arrange (desc(CPFH)) %>%
group_by(Base) %>%
top_n (n=1) %>%
head (10) %>%
ggplot() +
geom_bar(mapping=aes(x=reorder(Base,CPFH), y=CPFH),stat = "identity",colour = 'darkblue', fill = 'lightblue') +
coord_flip() + labs(x= 'Cost per flying hour', y = 'Bases')
## Selecting by CPFH
#8
W<-WsProg %>%
left_join(.,WSCat,by=c("Base","MD"))
ggplot(data=W)+geom_point( mapping=aes(x=End_Strength, y=Total_O.S,na.rm=TRUE))
## Warning: Ignoring unknown aesthetics: na.rm
ggplot(data=W)+
geom_point( mapping=aes(x=End_Strength, y=Total_O.S,na.rm=TRUE,color=factor(FY)))
## Warning: Ignoring unknown aesthetics: na.rm
ggplot(data=W)+geom_point( mapping=aes(x=End_Strength, y=Total_O.S,na.rm=TRUE, color=System, alpha=0.25))
## Warning: Ignoring unknown aesthetics: na.rm
ggplot(data=W)+geom_point( mapping=aes(x=End_Strength, y=Total_O.S,na.rm=TRUE, color=FH,alpha=0.10))
## Warning: Ignoring unknown aesthetics: na.rm