This is my report for week 5 assignment on managing dataframes, creating tibbles, tidying data and working with multiple dataframes
This packages contains multiple packages within it with can be used for data representation and manipulation
library(tidyverse)
library(tibble)
library(ggplot2)
The following datasets are used: bombers_wide.rds bombers_long.rds bombers_wide.rds bombers_combined.rds ws_programmatics.rds ws_categorizations.rds
setwd("C:/Users/Anitha/Documents/Data Wrangling with R (BANA 8090)/data/")
bomber_wide <- readRDS("bomber_wide.rds")
bomber_wide<-as_tibble(bomber_wide)
bomber_gather <-bomber_wide %>% gather(3:21,key="year", value="FH")
head(bomber_gather)
## # A tibble: 6 x 4
## Type MD year FH
## <chr> <chr> <chr> <int>
## 1 Bomber B-1 1996 26914
## 2 Bomber B-2 1996 2364
## 3 Bomber B-52 1996 28511
## 4 Bomber B-1 1997 25219
## 5 Bomber B-2 1997 2776
## 6 Bomber B-52 1997 26034
setwd("C:/Users/Anitha/Documents/Data Wrangling with R (BANA 8090)/data/")
bomber_long <- readRDS("bomber_long.rds")
bomber_long<-as_tibble(bomber_long)
bomber_spread <- bomber_long %>% spread(4, key="Output", value="Value")
head(bomber_spread)
## # A tibble: 6 x 6
## Type MD FY Cost FH Gallons
## <chr> <chr> <int> <dbl> <dbl> <dbl>
## 1 Bomber B-1 1996 72753781 26914 88594449
## 2 Bomber B-1 1997 71297263 25219 85484074
## 3 Bomber B-1 1998 84026805 24205 85259038
## 4 Bomber B-1 1999 71848336 23306 79323816
## 5 Bomber B-1 2000 58439777 25013 86230284
## 6 Bomber B-1 2001 94946077 25059 86892432
setwd("C:/Users/Anitha/Documents/Data Wrangling with R (BANA 8090)/data/")
bomber_combined <- readRDS("bomber_combined.rds")
bomber_combined <-as_tibble(bomber_combined)
bomber_separate <- bomber_combined %>% separate(AC, into = c("Type", "MD"), sep=6)
head(bomber_separate)
## # A tibble: 6 x 6
## Type MD FY Cost FH Gallons
## <chr> <chr> <int> <int> <int> <int>
## 1 Bomber B-1 1996 72753781 26914 88594449
## 2 Bomber B-1 1997 71297263 25219 85484074
## 3 Bomber B-1 1998 84026805 24205 85259038
## 4 Bomber B-1 1999 71848336 23306 79323816
## 5 Bomber B-1 2000 58439777 25013 86230284
## 6 Bomber B-1 2001 94946077 25059 86892432
setwd("C:/Users/Anitha/Documents/Data Wrangling with R (BANA 8090)/data/")
bomber_prefix <- readRDS("bomber_prefix.rds")
bomber_prefix<-as_tibble(bomber_prefix)
bomber_unite <- bomber_prefix %>% unite(MD,prefix,number, sep="-")
head(bomber_unite)
## # A tibble: 6 x 5
## Type MD FY Output Value
## <chr> <chr> <int> <chr> <int>
## 1 Bomber B-1 1996 FH 26914
## 2 Bomber B-1 1997 FH 25219
## 3 Bomber B-1 1998 FH 24205
## 4 Bomber B-1 1999 FH 23306
## 5 Bomber B-1 2000 FH 25013
## 6 Bomber B-1 2001 FH 25059
setwd("C:/Users/Anitha/Documents/Data Wrangling with R (BANA 8090)/data/")
bomber_mess <- readRDS("bomber_mess.rds")
bomber_mess <-as_tibble(bomber_mess)
bomber_mess_multiple <- bomber_mess %>% unite(MD,prefix,number,sep="-") %>% separate(Metric, into = c("Year","Output"),sep="_") %>%spread(key = Output, value = Value)
bomber_mess_multiple %>%
gather(`Cost`,`FH`,`Gallons`,key = "Metrics", value = "Value") %>%
transform(Year = as.numeric(Year)) %>%
ggplot(.) + geom_line(mapping = aes(x = Year, y = Value,group = MD,colour = MD))+
facet_wrap(~ Metrics, nrow = 3, scales = "free") +
scale_x_continuous(name="Year", breaks=seq(1996,2014,2))
head(bomber_mess_multiple)
## # A tibble: 6 x 6
## Type MD Year Cost FH Gallons
## <chr> <chr> <chr> <int> <int> <int>
## 1 Bomber B-1 1996 72753781 26914 88594449
## 2 Bomber B-1 1997 71297263 25219 85484074
## 3 Bomber B-1 1998 84026805 24205 85259038
## 4 Bomber B-1 1999 71848336 23306 79323816
## 5 Bomber B-1 2000 58439777 25013 86230284
## 6 Bomber B-1 2001 94946077 25059 86892432
setwd("C:/Users/Anitha/Documents/Data Wrangling with R (BANA 8090)/data/")
ws_programmatics <-readRDS("ws_programmatics.rds")
ws_programmatics <- as_tibble(ws_programmatics)
ws_categorization <-readRDS("ws_categorizations.rds")
ws_categorization<-as_tibble(ws_categorization)
ws_categorization %>%
inner_join(ws_programmatics, by = c("Base","MD")) %>%
filter(Base == "MINOT AFB (ND)",FY == 2014) %>%
filter(System %in% c("AIRCRAFT","MISSILES")) %>%
group_by(System) %>%
summarize(Sum_Total_OS = sum(Total_O.S,na.rm=TRUE),
End_Strength = sum(End_Strength,na.rm=TRUE))
## # A tibble: 2 x 3
## System Sum_Total_OS End_Strength
## <chr> <dbl> <dbl>
## 1 AIRCRAFT 297398235 2023
## 2 MISSILES 112224909 1951
ws_question7<-
ws_categorization %>%
inner_join(ws_programmatics, by = c("Base","MD")) %>%
select(Base,Total_O.S,FH)%>%
na.omit() %>%
group_by(Base) %>%
summarize(Sum_Total_OS = sum(Total_O.S,na.rm=TRUE),
SUm_FH = sum(FH,na.rm=TRUE)) %>%
mutate(CPFH = Sum_Total_OS / SUm_FH) %>%
select(Base,CPFH) %>%
arrange(desc(CPFH)) %>%
head(10)
ws_question7
## # A tibble: 10 x 2
## Base CPFH
## <chr> <dbl>
## 1 TINKER AFB (OK) 61780.13
## 2 ELLSWORTH AFB (SD) 39916.41
## 3 LANGLEY AFB (VA) 34467.86
## 4 MINOT AFB (ND) 31164.68
## 5 BARKSDALE AFB (LA) 30798.51
## 6 MOUNTAIN HOME AFB (ID) 25321.45
## 7 DYESS AFB (TX) 25216.68
## 8 HICKAM AFB (HI) 25090.59
## 9 LACKLAND AFB (TX) 23187.37
## 10 TYNDALL AFB (FL) 22426.92
ggplot(data = ws_question7) +
geom_bar(mapping = aes(x = Base, y = CPFH),stat = 'identity') +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
scale_x_discrete(name ="Base",
limits=ws_question7$Base)
ws_programmatics %>%
select(End_Strength,Total_O.S,FY) %>%
na.omit() %>%
ggplot(.) +
geom_point(mapping = aes(x = End_Strength, y = Total_O.S))+
facet_wrap(~ FY, nrow = 5)