library(tidyverse)
library(openintro)
library(tidyr)
library(dplyr)
library(tidyverse)
library(ggplot2)

Insert any text here.

flights <- read.csv("C:\\Users\\tanzi\\OneDrive\\DATA\\607\\Week5\\tanzildata.csv ")

library(readr)
# Read the CSV file directly from the URL
flights <- read.csv("https://raw.githubusercontent.com/tanzil64/Data-607-Assignment-05/main/tanzildata.csv")

# Display the dataset
print(flights)
##        X     X.1 Los.Angeles Phoenix San.Diego San.Francisco Seattle
## 1 ALASKA on time         497     221       212           503   1,841
## 2        delayed          62      12        20           102     305
## 3                         NA                NA            NA        
## 4 AMWEST on time         694   4,840       383           320     201
## 5        delayed         117     415        65           129      61
#provide the name for the cols
colnames(flights)[1] <- "airline"
colnames(flights)[2] <- "status"
df <-data.frame(flights)
df
##   airline  status Los.Angeles Phoenix San.Diego San.Francisco Seattle
## 1  ALASKA on time         497     221       212           503   1,841
## 2         delayed          62      12        20           102     305
## 3                          NA                NA            NA        
## 4  AMWEST on time         694   4,840       383           320     201
## 5         delayed         117     415        65           129      61
#Remove the na rows
df <- drop_na(df)
df
##   airline  status Los.Angeles Phoenix San.Diego San.Francisco Seattle
## 1  ALASKA on time         497     221       212           503   1,841
## 2         delayed          62      12        20           102     305
## 3  AMWEST on time         694   4,840       383           320     201
## 4         delayed         117     415        65           129      61
# Fill the airlines
df[df==""]<-NA
df<- fill(df, airline)
df
##   airline  status Los.Angeles Phoenix San.Diego San.Francisco Seattle
## 1  ALASKA on time         497     221       212           503   1,841
## 2  ALASKA delayed          62      12        20           102     305
## 3  AMWEST on time         694   4,840       383           320     201
## 4  AMWEST delayed         117     415        65           129      61
df1<- gather(df, key = 'city', value = 'frequency', 3:7)
df1
##    airline  status          city frequency
## 1   ALASKA on time   Los.Angeles       497
## 2   ALASKA delayed   Los.Angeles        62
## 3   AMWEST on time   Los.Angeles       694
## 4   AMWEST delayed   Los.Angeles       117
## 5   ALASKA on time       Phoenix       221
## 6   ALASKA delayed       Phoenix        12
## 7   AMWEST on time       Phoenix     4,840
## 8   AMWEST delayed       Phoenix       415
## 9   ALASKA on time     San.Diego       212
## 10  ALASKA delayed     San.Diego        20
## 11  AMWEST on time     San.Diego       383
## 12  AMWEST delayed     San.Diego        65
## 13  ALASKA on time San.Francisco       503
## 14  ALASKA delayed San.Francisco       102
## 15  AMWEST on time San.Francisco       320
## 16  AMWEST delayed San.Francisco       129
## 17  ALASKA on time       Seattle     1,841
## 18  ALASKA delayed       Seattle       305
## 19  AMWEST on time       Seattle       201
## 20  AMWEST delayed       Seattle        61
df1 <- spread(df1, status, frequency)
df1
##    airline          city delayed on time
## 1   ALASKA   Los.Angeles      62     497
## 2   ALASKA       Phoenix      12     221
## 3   ALASKA     San.Diego      20     212
## 4   ALASKA San.Francisco     102     503
## 5   ALASKA       Seattle     305   1,841
## 6   AMWEST   Los.Angeles     117     694
## 7   AMWEST       Phoenix     415   4,840
## 8   AMWEST     San.Diego      65     383
## 9   AMWEST San.Francisco     129     320
## 10  AMWEST       Seattle      61     201
#Drop "." from the city name
df1$city <- str_replace_all(df1$city, "\\.", " ")
df1
##    airline          city delayed on time
## 1   ALASKA   Los Angeles      62     497
## 2   ALASKA       Phoenix      12     221
## 3   ALASKA     San Diego      20     212
## 4   ALASKA San Francisco     102     503
## 5   ALASKA       Seattle     305   1,841
## 6   AMWEST   Los Angeles     117     694
## 7   AMWEST       Phoenix     415   4,840
## 8   AMWEST     San Diego      65     383
## 9   AMWEST San Francisco     129     320
## 10  AMWEST       Seattle      61     201
library(dplyr)
library(ggplot2)

library(dplyr)
library(ggplot2)

# Ensure the 'delayed' column is numeric
df1$delayed <- as.numeric(df1$delayed)

# Summarize the total number of delayed flights by airline
delayed_summary <- df1 %>%
  group_by(airline) %>%
  summarise(total_delayed = sum(delayed, na.rm = TRUE))

# Print the summary
print(delayed_summary)
## # A tibble: 2 × 2
##   airline total_delayed
##   <chr>           <dbl>
## 1 ALASKA            501
## 2 AMWEST            787
# Create a bar plot of the total number of delayed flights by airline
ggplot(delayed_summary, aes(x = airline, y = total_delayed, fill = airline)) +
  geom_bar(stat = "identity") +
  labs(title = "Total Number of Delayed Flights by Airline", x = "Airline", y = "Total Delayed Flights") +
  theme_minimal()

colnames(df1)[4] <- "on_time"

df1 <-data.frame(df1)
df1
##    airline          city delayed on_time
## 1   ALASKA   Los Angeles      62     497
## 2   ALASKA       Phoenix      12     221
## 3   ALASKA     San Diego      20     212
## 4   ALASKA San Francisco     102     503
## 5   ALASKA       Seattle     305   1,841
## 6   AMWEST   Los Angeles     117     694
## 7   AMWEST       Phoenix     415   4,840
## 8   AMWEST     San Diego      65     383
## 9   AMWEST San Francisco     129     320
## 10  AMWEST       Seattle      61     201
library(dplyr)
library(ggplot2)

# Ensure the 'on_time' column is numeric
df1$on_time <- as.numeric(gsub("_", "", df1$on_time))
## Warning: NAs introduced by coercion
# Summarize the total number of on-time flights by airline
on_time_summary <- df1 %>%
  group_by(airline) %>%
  summarise(total_on_time = sum(on_time, na.rm = TRUE))

# Print the summary
print(on_time_summary)
## # A tibble: 2 × 2
##   airline total_on_time
##   <chr>           <dbl>
## 1 ALASKA           1433
## 2 AMWEST           1598
# Create a bar plot of the total number of on-time flights by airline
ggplot(on_time_summary, aes(x = airline, y = total_on_time, fill = airline)) +
  geom_bar(stat = "identity") +
  labs(title = "Total Number of On-Time Flights by Airline", x = "Airline", y = "Total On-Time Flights") +
  theme_minimal()

summary(df1)
##    airline              city              delayed          on_time     
##  Length:10          Length:10          Min.   : 12.00   Min.   :201.0  
##  Class :character   Class :character   1st Qu.: 61.25   1st Qu.:218.8  
##  Mode  :character   Mode  :character   Median : 83.50   Median :351.5  
##                                        Mean   :128.80   Mean   :378.9  
##                                        3rd Qu.:126.00   3rd Qu.:498.5  
##                                        Max.   :415.00   Max.   :694.0  
##                                                         NA's   :2
library(ggplot2)
library(tidyr)

# Sample data frame
df1 <- data.frame(
  x = c("AMWEST", "delayed", "ALASKA", NA, "delayed"),
  x_1 = c("on time", "delayed", "on time", NA, "delayed"),
  los_angeles = c(694, 117, 497, NA, 62),
  phoenix = c(4840, 415, 221, NA, 12),
  san_diego = c(383, 65, 212, NA, 20),
  san_francisco = c(320, 129, 503, NA, 102),
  seattle = c(201, 61, 1841, NA, 305)
)

# Reshape the data frame to long format
df_long <- df1 %>%
  pivot_longer(cols = c(los_angeles, san_diego, san_francisco), 
               names_to = "city", 
               values_to = "delay")

# Filter out rows with NA values in 'x_1' or 'delay'
df_long <- df_long %>% 
  filter(!is.na(x_1) & !is.na(delay))

# Create the ggplot
ggplot(df_long, aes(x = city, y = delay, fill = x_1)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Delay by City", x = "City", y = "Delay") +
  theme_minimal()

summary(df1)
##       x                 x_1             los_angeles       phoenix      
##  Length:5           Length:5           Min.   : 62.0   Min.   :  12.0  
##  Class :character   Class :character   1st Qu.:103.2   1st Qu.: 168.8  
##  Mode  :character   Mode  :character   Median :307.0   Median : 318.0  
##                                        Mean   :342.5   Mean   :1372.0  
##                                        3rd Qu.:546.2   3rd Qu.:1521.2  
##                                        Max.   :694.0   Max.   :4840.0  
##                                        NA's   :1       NA's   :1       
##    san_diego      san_francisco      seattle    
##  Min.   : 20.00   Min.   :102.0   Min.   :  61  
##  1st Qu.: 53.75   1st Qu.:122.2   1st Qu.: 166  
##  Median :138.50   Median :224.5   Median : 253  
##  Mean   :170.00   Mean   :263.5   Mean   : 602  
##  3rd Qu.:254.75   3rd Qu.:365.8   3rd Qu.: 689  
##  Max.   :383.00   Max.   :503.0   Max.   :1841  
##  NA's   :1        NA's   :1       NA's   :1
path <- getwd()
write.csv(df1,file.path(path,"flightinfo.csv"),
 row.names = FALSE)
LS0tDQp0aXRsZTogIkRhdGEgQXNzaWdubWVudCAwNSINCmF1dGhvcjogIk1kLiBUYW56aWwgRWhzYW4iDQpkYXRlOiAiYHIgU3lzLkRhdGUoKWAiDQpvdXRwdXQ6IG9wZW5pbnRybzo6bGFiX3JlcG9ydA0KLS0tDQoNCmBgYHtyIGxvYWQtcGFja2FnZXMsIG1lc3NhZ2U9RkFMU0V9DQpsaWJyYXJ5KHRpZHl2ZXJzZSkNCmxpYnJhcnkob3BlbmludHJvKQ0KbGlicmFyeSh0aWR5cikNCmxpYnJhcnkoZHBseXIpDQpsaWJyYXJ5KHRpZHl2ZXJzZSkNCmxpYnJhcnkoZ2dwbG90MikNCmBgYA0KDQoNCg0KSW5zZXJ0IGFueSB0ZXh0IGhlcmUuDQoNCmBgYHtyfQ0KZmxpZ2h0cyA8LSByZWFkLmNzdigiQzpcXFVzZXJzXFx0YW56aVxcT25lRHJpdmVcXERBVEFcXDYwN1xcV2VlazVcXHRhbnppbGRhdGEuY3N2ICIpDQoNCmxpYnJhcnkocmVhZHIpDQojIFJlYWQgdGhlIENTViBmaWxlIGRpcmVjdGx5IGZyb20gdGhlIFVSTA0KZmxpZ2h0cyA8LSByZWFkLmNzdigiaHR0cHM6Ly9yYXcuZ2l0aHVidXNlcmNvbnRlbnQuY29tL3RhbnppbDY0L0RhdGEtNjA3LUFzc2lnbm1lbnQtMDUvbWFpbi90YW56aWxkYXRhLmNzdiIpDQoNCiMgRGlzcGxheSB0aGUgZGF0YXNldA0KcHJpbnQoZmxpZ2h0cykNCg0KDQpgYGANCg0KYGBge3J9DQojcHJvdmlkZSB0aGUgbmFtZSBmb3IgdGhlIGNvbHMNCmNvbG5hbWVzKGZsaWdodHMpWzFdIDwtICJhaXJsaW5lIg0KY29sbmFtZXMoZmxpZ2h0cylbMl0gPC0gInN0YXR1cyINCmRmIDwtZGF0YS5mcmFtZShmbGlnaHRzKQ0KZGYNCmBgYA0KDQoNCmBgYHtyfQ0KI1JlbW92ZSB0aGUgbmEgcm93cw0KZGYgPC0gZHJvcF9uYShkZikNCmRmDQpgYGANCg0KYGBge3J9DQojIEZpbGwgdGhlIGFpcmxpbmVzDQpkZltkZj09IiJdPC1OQQ0KZGY8LSBmaWxsKGRmLCBhaXJsaW5lKQ0KZGYNCmBgYA0KDQpgYGB7cn0NCmRmMTwtIGdhdGhlcihkZiwga2V5ID0gJ2NpdHknLCB2YWx1ZSA9ICdmcmVxdWVuY3knLCAzOjcpDQpkZjENCg0KYGBgDQpgYGB7cn0NCmRmMSA8LSBzcHJlYWQoZGYxLCBzdGF0dXMsIGZyZXF1ZW5jeSkNCmRmMQ0KYGBgDQpgYGB7cn0NCiNEcm9wICIuIiBmcm9tIHRoZSBjaXR5IG5hbWUNCmRmMSRjaXR5IDwtIHN0cl9yZXBsYWNlX2FsbChkZjEkY2l0eSwgIlxcLiIsICIgIikNCmRmMQ0KYGBgDQpgYGB7cn0NCmxpYnJhcnkoZHBseXIpDQpsaWJyYXJ5KGdncGxvdDIpDQoNCmxpYnJhcnkoZHBseXIpDQpsaWJyYXJ5KGdncGxvdDIpDQoNCiMgRW5zdXJlIHRoZSAnZGVsYXllZCcgY29sdW1uIGlzIG51bWVyaWMNCmRmMSRkZWxheWVkIDwtIGFzLm51bWVyaWMoZGYxJGRlbGF5ZWQpDQoNCiMgU3VtbWFyaXplIHRoZSB0b3RhbCBudW1iZXIgb2YgZGVsYXllZCBmbGlnaHRzIGJ5IGFpcmxpbmUNCmRlbGF5ZWRfc3VtbWFyeSA8LSBkZjEgJT4lDQogIGdyb3VwX2J5KGFpcmxpbmUpICU+JQ0KICBzdW1tYXJpc2UodG90YWxfZGVsYXllZCA9IHN1bShkZWxheWVkLCBuYS5ybSA9IFRSVUUpKQ0KDQojIFByaW50IHRoZSBzdW1tYXJ5DQpwcmludChkZWxheWVkX3N1bW1hcnkpDQoNCiMgQ3JlYXRlIGEgYmFyIHBsb3Qgb2YgdGhlIHRvdGFsIG51bWJlciBvZiBkZWxheWVkIGZsaWdodHMgYnkgYWlybGluZQ0KZ2dwbG90KGRlbGF5ZWRfc3VtbWFyeSwgYWVzKHggPSBhaXJsaW5lLCB5ID0gdG90YWxfZGVsYXllZCwgZmlsbCA9IGFpcmxpbmUpKSArDQogIGdlb21fYmFyKHN0YXQgPSAiaWRlbnRpdHkiKSArDQogIGxhYnModGl0bGUgPSAiVG90YWwgTnVtYmVyIG9mIERlbGF5ZWQgRmxpZ2h0cyBieSBBaXJsaW5lIiwgeCA9ICJBaXJsaW5lIiwgeSA9ICJUb3RhbCBEZWxheWVkIEZsaWdodHMiKSArDQogIHRoZW1lX21pbmltYWwoKQ0KDQpgYGANCg0KYGBge3J9DQpjb2xuYW1lcyhkZjEpWzRdIDwtICJvbl90aW1lIg0KDQpkZjEgPC1kYXRhLmZyYW1lKGRmMSkNCmRmMQ0KYGBgDQoNCmBgYHtyfQ0KbGlicmFyeShkcGx5cikNCmxpYnJhcnkoZ2dwbG90MikNCg0KIyBFbnN1cmUgdGhlICdvbl90aW1lJyBjb2x1bW4gaXMgbnVtZXJpYw0KZGYxJG9uX3RpbWUgPC0gYXMubnVtZXJpYyhnc3ViKCJfIiwgIiIsIGRmMSRvbl90aW1lKSkNCg0KIyBTdW1tYXJpemUgdGhlIHRvdGFsIG51bWJlciBvZiBvbi10aW1lIGZsaWdodHMgYnkgYWlybGluZQ0Kb25fdGltZV9zdW1tYXJ5IDwtIGRmMSAlPiUNCiAgZ3JvdXBfYnkoYWlybGluZSkgJT4lDQogIHN1bW1hcmlzZSh0b3RhbF9vbl90aW1lID0gc3VtKG9uX3RpbWUsIG5hLnJtID0gVFJVRSkpDQoNCiMgUHJpbnQgdGhlIHN1bW1hcnkNCnByaW50KG9uX3RpbWVfc3VtbWFyeSkNCg0KIyBDcmVhdGUgYSBiYXIgcGxvdCBvZiB0aGUgdG90YWwgbnVtYmVyIG9mIG9uLXRpbWUgZmxpZ2h0cyBieSBhaXJsaW5lDQpnZ3Bsb3Qob25fdGltZV9zdW1tYXJ5LCBhZXMoeCA9IGFpcmxpbmUsIHkgPSB0b3RhbF9vbl90aW1lLCBmaWxsID0gYWlybGluZSkpICsNCiAgZ2VvbV9iYXIoc3RhdCA9ICJpZGVudGl0eSIpICsNCiAgbGFicyh0aXRsZSA9ICJUb3RhbCBOdW1iZXIgb2YgT24tVGltZSBGbGlnaHRzIGJ5IEFpcmxpbmUiLCB4ID0gIkFpcmxpbmUiLCB5ID0gIlRvdGFsIE9uLVRpbWUgRmxpZ2h0cyIpICsNCiAgdGhlbWVfbWluaW1hbCgpDQoNCg0KDQpgYGANCmBgYHtyfQ0Kc3VtbWFyeShkZjEpDQpgYGANCmBgYHtyfQ0KbGlicmFyeShnZ3Bsb3QyKQ0KbGlicmFyeSh0aWR5cikNCg0KIyBTYW1wbGUgZGF0YSBmcmFtZQ0KZGYxIDwtIGRhdGEuZnJhbWUoDQogIHggPSBjKCJBTVdFU1QiLCAiZGVsYXllZCIsICJBTEFTS0EiLCBOQSwgImRlbGF5ZWQiKSwNCiAgeF8xID0gYygib24gdGltZSIsICJkZWxheWVkIiwgIm9uIHRpbWUiLCBOQSwgImRlbGF5ZWQiKSwNCiAgbG9zX2FuZ2VsZXMgPSBjKDY5NCwgMTE3LCA0OTcsIE5BLCA2MiksDQogIHBob2VuaXggPSBjKDQ4NDAsIDQxNSwgMjIxLCBOQSwgMTIpLA0KICBzYW5fZGllZ28gPSBjKDM4MywgNjUsIDIxMiwgTkEsIDIwKSwNCiAgc2FuX2ZyYW5jaXNjbyA9IGMoMzIwLCAxMjksIDUwMywgTkEsIDEwMiksDQogIHNlYXR0bGUgPSBjKDIwMSwgNjEsIDE4NDEsIE5BLCAzMDUpDQopDQoNCiMgUmVzaGFwZSB0aGUgZGF0YSBmcmFtZSB0byBsb25nIGZvcm1hdA0KZGZfbG9uZyA8LSBkZjEgJT4lDQogIHBpdm90X2xvbmdlcihjb2xzID0gYyhsb3NfYW5nZWxlcywgc2FuX2RpZWdvLCBzYW5fZnJhbmNpc2NvKSwgDQogICAgICAgICAgICAgICBuYW1lc190byA9ICJjaXR5IiwgDQogICAgICAgICAgICAgICB2YWx1ZXNfdG8gPSAiZGVsYXkiKQ0KDQojIEZpbHRlciBvdXQgcm93cyB3aXRoIE5BIHZhbHVlcyBpbiAneF8xJyBvciAnZGVsYXknDQpkZl9sb25nIDwtIGRmX2xvbmcgJT4lIA0KICBmaWx0ZXIoIWlzLm5hKHhfMSkgJiAhaXMubmEoZGVsYXkpKQ0KDQojIENyZWF0ZSB0aGUgZ2dwbG90DQpnZ3Bsb3QoZGZfbG9uZywgYWVzKHggPSBjaXR5LCB5ID0gZGVsYXksIGZpbGwgPSB4XzEpKSArDQogIGdlb21fYmFyKHN0YXQgPSAiaWRlbnRpdHkiLCBwb3NpdGlvbiA9ICJkb2RnZSIpICsNCiAgbGFicyh0aXRsZSA9ICJEZWxheSBieSBDaXR5IiwgeCA9ICJDaXR5IiwgeSA9ICJEZWxheSIpICsNCiAgdGhlbWVfbWluaW1hbCgpDQoNCmBgYA0KDQoNCmBgYHtyfQ0Kc3VtbWFyeShkZjEpDQpgYGANCg0KYGBge3J9DQoNCnBhdGggPC0gZ2V0d2QoKQ0Kd3JpdGUuY3N2KGRmMSxmaWxlLnBhdGgocGF0aCwiZmxpZ2h0aW5mby5jc3YiKSwNCiByb3cubmFtZXMgPSBGQUxTRSkNCmBgYA0KDQo=