using Airline_stats data

data<-read.csv("airline_stats.csv")
View(data)

##Summarize Categorical Data

summary(data$airline)
##    Length     Class      Mode 
##     33468 character character
x<-table(data$airline)
x
## 
##    Alaska  American     Delta  Jet Blue Southwest    United 
##      3851      5725      9107      3775      5584      5426
summary(data$pct_carrier_delay)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   0.000   4.145   6.357   7.041   9.140 100.000      28
summary(data$pct_atc_delay)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   0.000   2.147   3.900   5.090   6.630 100.000      28
summary(data$pct_weather_delay)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.0000  0.0000  0.3160  0.6848  0.9352 33.3333      28

##Plotting

table(data$airline)
## 
##    Alaska  American     Delta  Jet Blue Southwest    United 
##      3851      5725      9107      3775      5584      5426
barplot(table(data$airline) ,
        xlab="Airline" ,
        ylab="Number" ,
        main="Respondesnts by Airline" ,
        col="#AE4371")

barplot(table(data$airline), 
        xlab="Airline" , 
        ylab="Number" , 
        main="Respondents by Airline", 
        col="indianred4", 
        horiz=T)

boxplot(data$pct_weather_delay~data$airline, col="blue3")

## Histograms

hist(data$pct_weather_delay)

hist(data$pct_weather_delay ,
     breaks= 40, col= "darkslateblue")

##Scatterplots

par(mfrow=c(1,3))
plot(data$pct_carrier_delay, data$pct_atc_delay, col="darkmagenta")
plot(data$pct_atc_delay, data$pct_carrier_delay, col="darkred")
plot(data$pct_weather_delay, data$pct_atc_delay, col="deeppink4")

##Data visualisation using ggplot

library("ggplot2")

#Scatterplot

data1<-read.csv("airline_stats.csv")
ggplot(data=data1)+
  geom_point(mapping=aes(x=pct_weather_delay, y=pct_atc_delay, alpha=pct_weather_delay), col="firebrick")
## Warning: Removed 28 rows containing missing values or values outside the scale range
## (`geom_point()`).

ggplot(data=data1)+
  geom_point(aes(x=pct_weather_delay,y=pct_atc_delay, shape=airline), col="firebrick3")
## Warning: Removed 28 rows containing missing values or values outside the scale range
## (`geom_point()`).

ggplot(data=data)+
  geom_point(aes(x=pct_weather_delay,y=pct_atc_delay, col=airline))
## Warning: Removed 28 rows containing missing values or values outside the scale range
## (`geom_point()`).

ggplot(data=data)+
  geom_point(aes(x=pct_weather_delay,y=airline), col="darkslateblue")
## Warning: Removed 28 rows containing missing values or values outside the scale range
## (`geom_point()`).

ggplot(data=data1)+
  geom_point(aes(x=pct_weather_delay,y=pct_atc_delay, alpha=pct_weather_delay), col="darkolivegreen")+
  facet_wrap(~airline, ncol=3)
## Warning: Removed 28 rows containing missing values or values outside the scale range
## (`geom_point()`).

ggplot(data=data)+
  geom_bar(aes(x=pct_atc_delay, col=airline))
## Warning: Removed 28 rows containing non-finite outside the scale range
## (`stat_count()`).

p<-ggplot(data=data, aes(airline, pct_weather_delay))
p

p+
   geom_boxplot()
## Warning: Removed 28 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

p+geom_boxplot() + coord_flip()
## Warning: Removed 28 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

p+geom_boxplot(notch=TRUE, col ="darkolivegreen3")
## Warning: Removed 28 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Notch went outside hinges
## ℹ Do you want `notch = FALSE`?

p + geom_boxplot(varwidth = TRUE)
## Warning: Removed 28 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

p + geom_boxplot(fill = "darkkhaki", colour = "darkolivegreen")
## Warning: Removed 28 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

p + geom_boxplot(outlier.colour = "brown3", outlier.shape = 5)
## Warning: Removed 28 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

p + 
  geom_boxplot(outlier.shape = NA) + 
  geom_jitter(width = 0.3, alpha=0.09, col="blue4")
## Warning: Removed 28 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 28 rows containing missing values or values outside the scale range
## (`geom_point()`).

p + geom_boxplot(aes(col= airline))
## Warning: Removed 28 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

q<-ggplot(data=data, aes(pct_weather_delay))
q+
  geom_histogram()+
  geom_freqpoly(col="blue3")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 28 rows containing non-finite outside the scale range
## (`stat_bin()`).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 28 rows containing non-finite outside the scale range
## (`stat_bin()`).