airlines <- read.csv("https://raw.githubusercontent.com/jhumms/DATA607/main/assignment-5/airlines.csv",header = TRUE, sep = ",")
head(airlines)
## ï.. X Los.Angeles Pheonix San.Diego San.Fransisco Seattle
## 1 ALASKA on time 497 221 212 503 1841
## 2 delayed 62 12 20 102 305
## 3 NA NA NA NA NA
## 4 AM WEST on time 694 4840 383 320 201
## 5 delayed 117 415 65 129 61
airlines <- airlines[!apply(is.na(airlines) | airlines == "", 1, all),]
airlines <- airlines %>% rename(carrier = 1, status = 2)
airlines <- airlines %>% rename_all(tolower)
airlines <- rename_with(airlines, ~ tolower(gsub(".", "_", .x, fixed = TRUE)))
(Unfortunately fill() does not work in this scenario becuase you are working with strings instead of numbers, and thanks to IRTFM on Stackoverflow for the answer)
airlines[airlines==""] <- NA
airlines[1] <- lapply(airlines[1], zoo::na.locf)
Let’s take a look at the data now
head(airlines)
## carrier status los_angeles pheonix san_diego san_fransisco seattle
## 1 ALASKA on time 497 221 212 503 1841
## 2 ALASKA delayed 62 12 20 102 305
## 4 AM WEST on time 694 4840 383 320 201
## 5 AM WEST delayed 117 415 65 129 61
Much Better!
library("reshape2")
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
melt.airlines <- melt(airlines)
## Using carrier, status as id variables
melt.airlines <- melt.airlines %>% rename(destination = 3, amt = 4)
head(melt.airlines)
## carrier status destination amt
## 1 ALASKA on time los_angeles 497
## 2 ALASKA delayed los_angeles 62
## 3 AM WEST on time los_angeles 694
## 4 AM WEST delayed los_angeles 117
## 5 ALASKA on time pheonix 221
## 6 ALASKA delayed pheonix 12
ggplot(data=melt.airlines, aes(x=carrier, y=amt, fill=status)) +
geom_bar(stat="identity", position=position_dodge())
delays <- melt.airlines %>% select(carrier, status, amt) %>% group_by(carrier, status) %>% summarise(amt = sum(amt))
## `summarise()` has grouped output by 'carrier'. You can override using the `.groups` argument.
cast.delays <- dcast(delays, carrier~...)
## Using amt as value column: use value.var to override.
cast.delays <- rename_with(cast.delays, ~ gsub(" ", "_", .x, fixed = TRUE))
cast.delays <- cast.delays %>% mutate(perc_delayed = delayed/(delayed + on_time))
ggplot(data=cast.delays, aes(x=carrier, y=perc_delayed)) +
geom_bar(stat="identity")
cast.airlines <- dcast(melt.airlines, carrier+destination~...)
## Using amt as value column: use value.var to override.
cast.airlines <- rename_with(cast.airlines, ~ gsub(" ", "_", .x, fixed = TRUE))
cast.airlines <- cast.airlines %>% mutate(perc_delayed = round(delayed/((on_time+ delayed)), 2))
on_time_airlines<- cast.airlines %>% select(carrier, destination, perc_delayed)
ggplot(data=on_time_airlines, aes(x=destination, y=perc_delayed, fill=carrier)) +
geom_bar(stat="identity", position=position_dodge())