data<-read.csv("airline_stats.csv")
View(data)
##Summarize Categorical Data
summary(data$airline)
## Length Class Mode
## 33468 character character
x<-table(data$airline)
x
##
## Alaska American Delta Jet Blue Southwest United
## 3851 5725 9107 3775 5584 5426
summary(data$pct_carrier_delay)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.000 4.145 6.357 7.041 9.140 100.000 28
summary(data$pct_atc_delay)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.000 2.147 3.900 5.090 6.630 100.000 28
summary(data$pct_weather_delay)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0000 0.0000 0.3160 0.6848 0.9352 33.3333 28
##Plotting
table(data$airline)
##
## Alaska American Delta Jet Blue Southwest United
## 3851 5725 9107 3775 5584 5426
barplot(table(data$airline) ,
xlab="Airline" ,
ylab="Number" ,
main="Respondesnts by Airline" ,
col="#AE4371")
barplot(table(data$airline),
xlab="Airline" ,
ylab="Number" ,
main="Respondents by Airline",
col="indianred4",
horiz=T)
boxplot(data$pct_weather_delay~data$airline, col="blue3")
## Histograms
hist(data$pct_weather_delay)
hist(data$pct_weather_delay ,
breaks= 40, col= "darkslateblue")
##Scatterplots
par(mfrow=c(1,3))
plot(data$pct_carrier_delay, data$pct_atc_delay, col="darkmagenta")
plot(data$pct_atc_delay, data$pct_carrier_delay, col="darkred")
plot(data$pct_weather_delay, data$pct_atc_delay, col="deeppink4")
##Data visualisation using ggplot
library("ggplot2")
#Scatterplot
data1<-read.csv("airline_stats.csv")
ggplot(data=data1)+
geom_point(mapping=aes(x=pct_weather_delay, y=pct_atc_delay, alpha=pct_weather_delay), col="firebrick")
## Warning: Removed 28 rows containing missing values or values outside the scale range
## (`geom_point()`).
ggplot(data=data1)+
geom_point(aes(x=pct_weather_delay,y=pct_atc_delay, shape=airline), col="firebrick3")
## Warning: Removed 28 rows containing missing values or values outside the scale range
## (`geom_point()`).
ggplot(data=data)+
geom_point(aes(x=pct_weather_delay,y=pct_atc_delay, col=airline))
## Warning: Removed 28 rows containing missing values or values outside the scale range
## (`geom_point()`).
ggplot(data=data)+
geom_point(aes(x=pct_weather_delay,y=airline), col="darkslateblue")
## Warning: Removed 28 rows containing missing values or values outside the scale range
## (`geom_point()`).
ggplot(data=data1)+
geom_point(aes(x=pct_weather_delay,y=pct_atc_delay, alpha=pct_weather_delay), col="darkolivegreen")+
facet_wrap(~airline, ncol=3)
## Warning: Removed 28 rows containing missing values or values outside the scale range
## (`geom_point()`).
ggplot(data=data)+
geom_bar(aes(x=pct_atc_delay, col=airline))
## Warning: Removed 28 rows containing non-finite outside the scale range
## (`stat_count()`).
p<-ggplot(data=data, aes(airline, pct_weather_delay))
p
p+
geom_boxplot()
## Warning: Removed 28 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
p+geom_boxplot() + coord_flip()
## Warning: Removed 28 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
p+geom_boxplot(notch=TRUE, col ="darkolivegreen3")
## Warning: Removed 28 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Notch went outside hinges
## ℹ Do you want `notch = FALSE`?
p + geom_boxplot(varwidth = TRUE)
## Warning: Removed 28 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
p + geom_boxplot(fill = "darkkhaki", colour = "darkolivegreen")
## Warning: Removed 28 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
p + geom_boxplot(outlier.colour = "brown3", outlier.shape = 5)
## Warning: Removed 28 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
p +
geom_boxplot(outlier.shape = NA) +
geom_jitter(width = 0.3, alpha=0.09, col="blue4")
## Warning: Removed 28 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 28 rows containing missing values or values outside the scale range
## (`geom_point()`).
p + geom_boxplot(aes(col= airline))
## Warning: Removed 28 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
q<-ggplot(data=data, aes(pct_weather_delay))
q+
geom_histogram()+
geom_freqpoly(col="blue3")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 28 rows containing non-finite outside the scale range
## (`stat_bin()`).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 28 rows containing non-finite outside the scale range
## (`stat_bin()`).