flight_data <- read.csv("C:/Users/TO UYEN/Downloads/flight_data.csv")
dim(flight_data)
## [1] 1048575 26
=> Bộ dữ liệu có 1048575 quan sát và 26 biến.
names(flight_data)
## [1] "month" "day_of_month" "day_of_week"
## [4] "fl_date" "op_unique_carrier" "op_carrier_fl_num"
## [7] "origin" "origin_city_name" "origin_state_nm"
## [10] "dest" "dest_city_name" "dest_state_nm"
## [13] "crs_dep_time" "dep_time" "dep_delay"
## [16] "taxi_out" "wheels_off" "wheels_on"
## [19] "taxi_in" "crs_arr_time" "arr_time"
## [22] "arr_delay" "crs_elapsed_time" "actual_elapsed_time"
## [25] "air_time" "distance"
sapply(flight_data, typeof)
## month day_of_month day_of_week fl_date
## "integer" "integer" "integer" "character"
## op_unique_carrier op_carrier_fl_num origin origin_city_name
## "character" "integer" "character" "character"
## origin_state_nm dest dest_city_name dest_state_nm
## "character" "character" "character" "character"
## crs_dep_time dep_time dep_delay taxi_out
## "integer" "integer" "integer" "integer"
## wheels_off wheels_on taxi_in crs_arr_time
## "integer" "integer" "integer" "integer"
## arr_time arr_delay crs_elapsed_time actual_elapsed_time
## "integer" "integer" "integer" "integer"
## air_time distance
## "integer" "integer"
summary(flight_data)
## month day_of_month day_of_week fl_date
## Min. :1.000 Min. : 1.00 Min. :1.000 Length:1048575
## 1st Qu.:1.000 1st Qu.: 8.00 1st Qu.:2.000 Class :character
## Median :1.000 Median :15.00 Median :4.000 Mode :character
## Mean :1.478 Mean :15.31 Mean :3.893
## 3rd Qu.:2.000 3rd Qu.:23.00 3rd Qu.:6.000
## Max. :2.000 Max. :31.00 Max. :7.000
##
## op_unique_carrier op_carrier_fl_num origin origin_city_name
## Length:1048575 Min. : 1 Length:1048575 Length:1048575
## Class :character 1st Qu.:1088 Class :character Class :character
## Mode :character Median :2074 Mode :character Mode :character
## Mean :2355
## 3rd Qu.:3467
## Max. :8819
##
## origin_state_nm dest dest_city_name dest_state_nm
## Length:1048575 Length:1048575 Length:1048575 Length:1048575
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## crs_dep_time dep_time dep_delay taxi_out
## Min. : 1 Min. : 1 Min. : -96.00 Min. : 1.00
## 1st Qu.: 905 1st Qu.: 911 1st Qu.: -6.00 1st Qu.: 12.00
## Median :1315 Median :1323 Median : -2.00 Median : 15.00
## Mean :1319 Mean :1325 Mean : 11.68 Mean : 18.25
## 3rd Qu.:1727 3rd Qu.:1736 3rd Qu.: 8.00 3rd Qu.: 21.00
## Max. :2359 Max. :2400 Max. :3125.00 Max. :213.00
## NA's :22553 NA's :22650 NA's :23125
## wheels_off wheels_on taxi_in crs_arr_time
## Min. : 1 Min. : 1 Min. : 1.000 Min. : 1
## 1st Qu.: 929 1st Qu.:1058 1st Qu.: 4.000 1st Qu.:1115
## Median :1337 Median :1510 Median : 6.000 Median :1522
## Mean :1350 Mean :1476 Mean : 8.083 Mean :1503
## 3rd Qu.:1750 3rd Qu.:1914 3rd Qu.: 9.000 3rd Qu.:1924
## Max. :2400 Max. :2400 Max. :444.000 Max. :2359
## NA's :23125 NA's :23677 NA's :23677
## arr_time arr_delay crs_elapsed_time actual_elapsed_time
## Min. : 1 Min. :-117.000 Min. : 6.0 Min. : 16.0
## 1st Qu.:1102 1st Qu.: -17.000 1st Qu.: 95.0 1st Qu.: 89.0
## Median :1514 Median : -7.000 Median :133.0 Median :127.0
## Mean :1481 Mean : 5.584 Mean :148.8 Mean :142.5
## 3rd Qu.:1920 3rd Qu.: 9.000 3rd Qu.:180.0 3rd Qu.:175.0
## Max. :2400 Max. :3136.000 Max. :859.0 Max. :792.0
## NA's :23675 NA's :25751 NA's :1 NA's :25751
## air_time distance
## Min. : 8.0 Min. : 31.0
## 1st Qu.: 64.0 1st Qu.: 402.0
## Median :100.0 Median : 692.0
## Mean :116.2 Mean : 834.5
## 3rd Qu.:147.0 3rd Qu.:1069.0
## Max. :723.0 Max. :5095.0
## NA's :25751
sum(duplicated(flight_data))
## [1] 0
=> Không có quan sát bị trùng lặp trong bộ dữ liệu.