This is my homework report for week 5, produced with R Markdown. In this homework I am working on tibbles and learning how clean data and making data tidy
library(dplyr)## used to manupulate data
library("tibble") ## used to create tibbles
library(rio) ## used to read rdm file
library(tidyverse)#group of packages used to summarise and visualize data
library(tidyr) ## functions which can be used to make data tidy
setwd("C:/tauseef/data_wrangling/Data Wrangling with R (BANA 8090)")
bomber_wide<-import("bomber_wide.rds")
bomber_wide_t<- as_tibble(bomber_wide)
bomber_wide_g<-bomber_wide_t %>%
gather(key="year",value="FH",match('1996':'2014', names(.)))
bomber_wide_g
## # A tibble: 57 × 4
## Type MD year FH
## <chr> <chr> <chr> <int>
## 1 Bomber B-1 1996 26914
## 2 Bomber B-2 1996 2364
## 3 Bomber B-52 1996 28511
## 4 Bomber B-1 1997 25219
## 5 Bomber B-2 1997 2776
## 6 Bomber B-52 1997 26034
## 7 Bomber B-1 1998 24205
## 8 Bomber B-2 1998 2166
## 9 Bomber B-52 1998 25639
## 10 Bomber B-1 1999 23306
## # ... with 47 more rows
bomber_long<-import("bomber_long.rds")
bomber_long_t<- as_tibble(bomber_long)
bomber_long_g<-bomber_long_t %>%
spread(key=Output,value=Value)
bomber_long_g
## # A tibble: 57 × 6
## Type MD FY Cost FH Gallons
## * <chr> <chr> <int> <int> <int> <int>
## 1 Bomber B-1 1996 72753781 26914 88594449
## 2 Bomber B-1 1997 71297263 25219 85484074
## 3 Bomber B-1 1998 84026805 24205 85259038
## 4 Bomber B-1 1999 71848336 23306 79323816
## 5 Bomber B-1 2000 58439777 25013 86230284
## 6 Bomber B-1 2001 94946077 25059 86892432
## 7 Bomber B-1 2002 96458536 26581 89198262
## 8 Bomber B-1 2003 68650070 21491 74485788
## 9 Bomber B-1 2004 101895634 28118 101397707
## 10 Bomber B-1 2005 124816690 21859 78410415
## # ... with 47 more rows
You can also embed plots, for example:
bomber_combined<-import("bomber_combined.rds")
bomber_combined_t<- as_tibble(bomber_combined)
bomber_combined_g<-bomber_combined_t %>%
separate(AC,into=c("Type","MD"),sep=' ',convert = TRUE)
bomber_combined_g
## # A tibble: 57 × 6
## Type MD FY Cost FH Gallons
## * <chr> <chr> <int> <int> <int> <int>
## 1 Bomber B-1 1996 72753781 26914 88594449
## 2 Bomber B-1 1997 71297263 25219 85484074
## 3 Bomber B-1 1998 84026805 24205 85259038
## 4 Bomber B-1 1999 71848336 23306 79323816
## 5 Bomber B-1 2000 58439777 25013 86230284
## 6 Bomber B-1 2001 94946077 25059 86892432
## 7 Bomber B-1 2002 96458536 26581 89198262
## 8 Bomber B-1 2003 68650070 21491 74485788
## 9 Bomber B-1 2004 101895634 28118 101397707
## 10 Bomber B-1 2005 124816690 21859 78410415
## # ... with 47 more rows
You can also embed plots, for example:
bomber_prefix<-import("bomber_prefix.rds")
bomber_prefix_t<- as_tibble(bomber_prefix)
bomber_prefix_g<-bomber_prefix_t %>%
unite(MD,prefix,number,sep='-')
bomber_prefix_g
## # A tibble: 171 × 5
## Type MD FY Output Value
## * <chr> <chr> <int> <chr> <int>
## 1 Bomber B-1 1996 FH 26914
## 2 Bomber B-1 1997 FH 25219
## 3 Bomber B-1 1998 FH 24205
## 4 Bomber B-1 1999 FH 23306
## 5 Bomber B-1 2000 FH 25013
## 6 Bomber B-1 2001 FH 25059
## 7 Bomber B-1 2002 FH 26581
## 8 Bomber B-1 2003 FH 21491
## 9 Bomber B-1 2004 FH 28118
## 10 Bomber B-1 2005 FH 21859
## # ... with 161 more rows
You can also embed plots, for example:
bomber_mess<-import("bomber_mess.rds")
bomber_mess_t<- as_tibble(bomber_mess)
bomber_mess_g<-bomber_mess_t %>%
unite(MD,prefix,number,sep='-')%>%
separate(Metric,into=c("FY","new"))%>%
spread(key=new,value=Value)
bomber_mess_g_temp <- bomber_mess %>%
unite(MD,prefix,number,sep="-") %>%
separate(Metric,into = c("FY","new"),sep="_")
ggplot(data=bomber_mess_g_temp, aes(x=FY, y=Value,group=MD, colour = MD)) +
geom_line() + geom_point() + facet_grid(new~., scales = "free") +
ggtitle("Historical trends of metrics for mission design aircrafts") +
labs(y="Value", x="Year")
bomber_mess_g
## # A tibble: 57 × 6
## Type MD FY Cost FH Gallons
## * <chr> <chr> <chr> <int> <int> <int>
## 1 Bomber B-1 1996 72753781 26914 88594449
## 2 Bomber B-1 1997 71297263 25219 85484074
## 3 Bomber B-1 1998 84026805 24205 85259038
## 4 Bomber B-1 1999 71848336 23306 79323816
## 5 Bomber B-1 2000 58439777 25013 86230284
## 6 Bomber B-1 2001 94946077 25059 86892432
## 7 Bomber B-1 2002 96458536 26581 89198262
## 8 Bomber B-1 2003 68650070 21491 74485788
## 9 Bomber B-1 2004 101895634 28118 101397707
## 10 Bomber B-1 2005 124816690 21859 78410415
## # ... with 47 more rows
ws_programmatics<-import("ws_programmatics.rds")
ws_programmatics_t<-as_tibble(ws_programmatics)
ws_categorizations<-import("ws_categorizations.rds")
ws_categorizations_t<-as_tibble(ws_categorizations)
ws_summary <- ws_programmatics_t %>%
left_join(ws_categorizations_t,by=c("Base","MD")) %>%
filter(FY == 2014 & Base == "MINOT AFB (ND)") %>%
filter(System=="AIRCRAFT" | System=="MISSILES") %>%
group_by(System) %>%
summarise(Total_Sum = sum(Total_O.S,na.rm=TRUE),Total_end_Strength = sum(End_Strength,na.rm=TRUE))
ws_summary
## # A tibble: 2 × 3
## System Total_Sum Total_end_Strength
## <chr> <dbl> <dbl>
## 1 AIRCRAFT 297398235 2023
## 2 MISSILES 112224909 1951
ws_top_10_base <- ws_programmatics_t %>%
left_join(ws_categorizations_t,by=c("Base","MD")) %>%
filter(FY == 2014) %>%
group_by(Base) %>%
summarise(CPFH = sum(Total_O.S,na.rm=TRUE)/sum(FH,na.rm=TRUE)) %>%
filter(CPFH != Inf) %>%
arrange(desc(CPFH)) %>%
head(n=10) %>%
select(Base,CPFH)
ws_top_10_base %>% ggplot() +
geom_bar(mapping = aes(x = reorder(Base,CPFH),y=CPFH,fill = Base),stat="Identity") +
coord_flip() +
ggtitle(" Top 10 bases with the largest cost per flying hours") +
labs(y="Cost/Flying Hours", x="Bases")
ws_FY <- ws_programmatics_t %>%
left_join(ws_categorizations_t,by=c("Base","MD")) %>%
group_by(FY) %>%
summarise(cost=sum(Total_O.S,na.rm=TRUE),tot_ES=sum(End_Strength,na.rm=TRUE)) %>%
select(FY,cost,tot_ES)
ws_FY%>%
ggplot() +
geom_point(mapping = aes(x = tot_ES, y = cost,colour = FY))+
labs(y="Total cost of operating", x="total personnel (head count) operating and supporting the weapon system ")
ws_System <- ws_programmatics_t %>%
left_join(ws_categorizations_t,by=c("Base","MD")) %>%
group_by(System) %>%
summarise(cost=sum(Total_O.S,na.rm=TRUE),tot_ES=sum(End_Strength,na.rm=TRUE)) %>%
select(System,cost,tot_ES)
ws_System%>%
ggplot() +
geom_point(mapping = aes(x = tot_ES, y = cost,colour = System))+
labs(y="Total cost of operating", x="total personnel (head count) operating and supporting the weapon system ")
ws_Base <- ws_programmatics_t %>%
left_join(ws_categorizations_t,by=c("Base","MD")) %>%
group_by(Base) %>%
summarise(cost=sum(Total_O.S,na.rm=TRUE),tot_ES=sum(End_Strength,na.rm=TRUE)) %>%
select(Base,cost,tot_ES)
ws_Base%>%
ggplot() +
geom_point(mapping = aes(y = tot_ES, x = cost,colour = Base))+
labs(y="Total cost of operating", x="total personnel (head count) operating and supporting the weapon system ")