R Markdown

this:

## Read the Original data from GitHub link
urlfile <- 'https://raw.githubusercontent.com/baruab/Week2_HW/main/USAirlines.csv'
datain <- read.csv(urlfile)

airline_data <- data.frame(datain)
#View(airline_data)


# Summarize the data
print(paste("Summarize Airline data"))
## [1] "Summarize Airline data"
summary(airline_data)
##        X              firm          year          output       
##  Min.   : 1.00   Min.   :1.0   Min.   :1970   Min.   :0.03768  
##  1st Qu.:23.25   1st Qu.:2.0   1st Qu.:1973   1st Qu.:0.14213  
##  Median :45.50   Median :3.5   Median :1977   Median :0.30503  
##  Mean   :45.50   Mean   :3.5   Mean   :1977   Mean   :0.54499  
##  3rd Qu.:67.75   3rd Qu.:5.0   3rd Qu.:1981   3rd Qu.:0.94528  
##  Max.   :90.00   Max.   :6.0   Max.   :1984   Max.   :1.93646  
##       cost             price              load       
##  Min.   :  68978   Min.   : 103795   Min.   :0.4321  
##  1st Qu.: 292046   1st Qu.: 129848   1st Qu.:0.5288  
##  Median : 637001   Median : 357434   Median :0.5661  
##  Mean   :1122524   Mean   : 471683   Mean   :0.5605  
##  3rd Qu.:1345968   3rd Qu.: 849840   3rd Qu.:0.5947  
##  Max.   :4748320   Max.   :1015610   Max.   :0.6763
# Mean of two attributes
print(paste("Mean Cost is",format(mean(airline_data$cost))))
## [1] "Mean Cost is 1122524"
print(paste("Mean Load is",format(mean(airline_data$load))))
## [1] "Mean Load is 0.5604602"
#Median of two attributes
print(paste("Median Cost is",format(median(airline_data$cost))))
## [1] "Median Cost is 637001"
print(paste("Median Load is",format(median(airline_data$load))))
## [1] "Median Load is 0.566085"
# Create Sub-set data
airline2_subdata <- subset(airline_data, firm ==2, select = c("year","cost", "load"))

colnames(airline2_subdata) <- c("Year", "Total Cost", "Capacity Utilization")

#View(airline2_subdata)
# Summarize the data for Airline Firm 2
cat("\n")
print(paste("Summarize Airline Subset data"))
## [1] "Summarize Airline Subset data"
summary(airline2_subdata)
##       Year        Total Cost      Capacity Utilization
##  Min.   :1970   Min.   : 569292   Min.   :0.4734      
##  1st Qu.:1974   1st Qu.:1101632   1st Qu.:0.5196      
##  Median :1977   Median :1709270   Median :0.5402      
##  Mean   :1977   Mean   :2127884   Mean   :0.5471      
##  3rd Qu.:1980   3rd Qu.:3347720   3rd Qu.:0.5694      
##  Max.   :1984   Max.   :4209390   Max.   :0.6287
# Mean of Subset data with two attributes
print(paste("Subdata:: Mean Total Cost  is",format(mean(airline2_subdata$`Total Cost`))))
## [1] "Subdata:: Mean Total Cost  is 2127884"
print(paste("Subdata:: Mean Capacity Util(Load)  is",format(mean(airline2_subdata$`Capacity Utilization`))))
## [1] "Subdata:: Mean Capacity Util(Load)  is 0.5470946"
#Median of two attributes
print(paste("Subdata:: Median Total Cost  is",format(median(airline2_subdata$`Total Cost`))))
## [1] "Subdata:: Median Total Cost  is 1709270"
print(paste("Subdata:: Median Capacity Util(Load) is",format(median(airline2_subdata$`Capacity Utilization`))))
## [1] "Subdata:: Median Capacity Util(Load) is 0.540163"
cat("Comparison Analysis:\n The mean and median values in the two datasets are way different, due to the second dataset being subset of the first with less row of data")
## Comparison Analysis:
##  The mean and median values in the two datasets are way different, due to the second dataset being subset of the first with less row of data
airline2_subdata$`Total Cost` <- ifelse(airline2_subdata$`Total Cost` < 2127884, "Less than mean","Higher than mean")
airline2_subdata
##    Year       Total Cost Capacity Utilization
## 16 1970   Less than mean             0.490851
## 17 1971   Less than mean             0.473449
## 18 1972   Less than mean             0.503013
## 19 1973   Less than mean             0.512501
## 20 1974   Less than mean             0.566782
## 21 1975   Less than mean             0.558133
## 22 1976   Less than mean             0.558799
## 23 1977   Less than mean             0.572070
## 24 1978   Less than mean             0.624763
## 25 1979 Higher than mean             0.628706
## 26 1980 Higher than mean             0.589150
## 27 1981 Higher than mean             0.532612
## 28 1982 Higher than mean             0.526652
## 29 1983 Higher than mean             0.540163
## 30 1984 Higher than mean             0.528775