install.packages("tidyverse")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.2'
## (as 'lib' is unspecified)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.1 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
install.packages("lubridate")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.2'
## (as 'lib' is unspecified)
library(lubridate)
Dataset: kaggle datasets download -d grumpylew123/dataset
AviationData <- read_csv("/cloud/project/AviationData.csv")
## Rows: 88889 Columns: 31
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (25): Event.Id, Investigation.Type, Accident.Number, Location, Country,...
## dbl (5): Number.of.Engines, Total.Fatal.Injuries, Total.Serious.Injuries, ...
## date (1): Event.Date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
summary(AviationData)
## Event.Id Investigation.Type Accident.Number Event.Date
## Length:88889 Length:88889 Length:88889 Min. :1948-10-24
## Class :character Class :character Class :character 1st Qu.:1989-01-15
## Mode :character Mode :character Mode :character Median :1998-07-18
## Mean :1999-09-17
## 3rd Qu.:2009-07-01
## Max. :2022-12-29
##
## Location Country Latitude Longitude
## Length:88889 Length:88889 Length:88889 Length:88889
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## Airport.Code Airport.Name Injury.Severity Aircraft.damage
## Length:88889 Length:88889 Length:88889 Length:88889
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## Aircraft.Category Registration.Number Make Model
## Length:88889 Length:88889 Length:88889 Length:88889
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## Amateur.Built Number.of.Engines Engine.Type FAR.Description
## Length:88889 Min. :0.000 Length:88889 Length:88889
## Class :character 1st Qu.:1.000 Class :character Class :character
## Mode :character Median :1.000 Mode :character Mode :character
## Mean :1.147
## 3rd Qu.:1.000
## Max. :8.000
## NA's :6084
## Schedule Purpose.of.flight Air.carrier Total.Fatal.Injuries
## Length:88889 Length:88889 Length:88889 Min. : 0.000
## Class :character Class :character Class :character 1st Qu.: 0.000
## Mode :character Mode :character Mode :character Median : 0.000
## Mean : 0.648
## 3rd Qu.: 0.000
## Max. :349.000
## NA's :11401
## Total.Serious.Injuries Total.Minor.Injuries Total.Uninjured
## Min. : 0.00 Min. : 0.000 Min. : 0.000
## 1st Qu.: 0.00 1st Qu.: 0.000 1st Qu.: 0.000
## Median : 0.00 Median : 0.000 Median : 1.000
## Mean : 0.28 Mean : 0.357 Mean : 5.325
## 3rd Qu.: 0.00 3rd Qu.: 0.000 3rd Qu.: 2.000
## Max. :161.00 Max. :380.000 Max. :699.000
## NA's :12510 NA's :11933 NA's :5912
## Weather.Condition Broad.phase.of.flight Report.Status Publication.Date
## Length:88889 Length:88889 Length:88889 Length:88889
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
glimpse(AviationData)
## Rows: 88,889
## Columns: 31
## $ Event.Id <chr> "20001218X45444", "20001218X45447", "20061025X0…
## $ Investigation.Type <chr> "Accident", "Accident", "Accident", "Accident",…
## $ Accident.Number <chr> "SEA87LA080", "LAX94LA336", "NYC07LA005", "LAX9…
## $ Event.Date <date> 1948-10-24, 1962-07-19, 1974-08-30, 1977-06-19…
## $ Location <chr> "MOOSE CREEK, ID", "BRIDGEPORT, CA", "Saltville…
## $ Country <chr> "United States", "United States", "United State…
## $ Latitude <chr> NA, NA, "36.922223", NA, NA, "42.445277", NA, N…
## $ Longitude <chr> NA, NA, "-81.878056", NA, NA, "-70.758333", NA,…
## $ Airport.Code <chr> NA, NA, NA, NA, NA, NA, NA, NA, "N58", "JAX", N…
## $ Airport.Name <chr> NA, NA, NA, NA, NA, "N/A", NA, "BLACKBURN AG ST…
## $ Injury.Severity <chr> "Fatal(2)", "Fatal(4)", "Fatal(3)", "Fatal(2)",…
## $ Aircraft.damage <chr> "Destroyed", "Destroyed", "Destroyed", "Destroy…
## $ Aircraft.Category <chr> NA, NA, NA, NA, NA, "Airplane", NA, "Airplane",…
## $ Registration.Number <chr> "NC6404", "N5069P", "N5142R", "N1168J", "N15NY"…
## $ Make <chr> "Stinson", "Piper", "Cessna", "Rockwell", "Cess…
## $ Model <chr> "108-3", "PA24-180", "172M", "112", "501", "DC9…
## $ Amateur.Built <chr> "No", "No", "No", "No", "No", "No", "No", "No",…
## $ Number.of.Engines <dbl> 1, 1, 1, 1, NA, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1…
## $ Engine.Type <chr> "Reciprocating", "Reciprocating", "Reciprocatin…
## $ FAR.Description <chr> NA, NA, NA, NA, NA, "Part 129: Foreign", NA, "P…
## $ Schedule <chr> NA, NA, NA, NA, NA, "SCHD", NA, NA, NA, NA, NA,…
## $ Purpose.of.flight <chr> "Personal", "Personal", "Personal", "Personal",…
## $ Air.carrier <chr> NA, NA, NA, NA, NA, "Air Canada", NA, NA, NA, N…
## $ Total.Fatal.Injuries <dbl> 2, 4, 3, 2, 1, NA, 4, 0, 0, 0, 0, 0, 0, 1, 1, 2…
## $ Total.Serious.Injuries <dbl> 0, 0, NA, 0, 2, NA, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Total.Minor.Injuries <dbl> 0, 0, NA, 0, NA, 1, 0, 0, 0, 3, 0, 0, 1, 0, 0, …
## $ Total.Uninjured <dbl> 0, 0, NA, 0, 0, 44, 0, 2, 2, 0, 1, 1, 0, 0, 0, …
## $ Weather.Condition <chr> "UNK", "UNK", "IMC", "IMC", "VMC", "VMC", "IMC"…
## $ Broad.phase.of.flight <chr> "Cruise", "Unknown", "Cruise", "Cruise", "Appro…
## $ Report.Status <chr> "Probable Cause", "Probable Cause", "Probable C…
## $ Publication.Date <chr> NA, "19-09-1996", "26-02-2007", "12-09-2000", "…
head(AviationData, 6)
## # A tibble: 6 × 31
## Event.Id Inves…¹ Accid…² Event.Date Locat…³ Country Latit…⁴ Longi…⁵ Airpo…⁶
## <chr> <chr> <chr> <date> <chr> <chr> <chr> <chr> <chr>
## 1 20001218X4… Accide… SEA87L… 1948-10-24 MOOSE … United… <NA> <NA> <NA>
## 2 20001218X4… Accide… LAX94L… 1962-07-19 BRIDGE… United… <NA> <NA> <NA>
## 3 20061025X0… Accide… NYC07L… 1974-08-30 Saltvi… United… 36.922… -81.87… <NA>
## 4 20001218X4… Accide… LAX96L… 1977-06-19 EUREKA… United… <NA> <NA> <NA>
## 5 20041105X0… Accide… CHI79F… 1979-08-02 Canton… United… <NA> <NA> <NA>
## 6 20170710X5… Accide… NYC79A… 1979-09-17 BOSTON… United… 42.445… -70.75… <NA>
## # … with 22 more variables: Airport.Name <chr>, Injury.Severity <chr>,
## # Aircraft.damage <chr>, Aircraft.Category <chr>, Registration.Number <chr>,
## # Make <chr>, Model <chr>, Amateur.Built <chr>, Number.of.Engines <dbl>,
## # Engine.Type <chr>, FAR.Description <chr>, Schedule <chr>,
## # Purpose.of.flight <chr>, Air.carrier <chr>, Total.Fatal.Injuries <dbl>,
## # Total.Serious.Injuries <dbl>, Total.Minor.Injuries <dbl>,
## # Total.Uninjured <dbl>, Weather.Condition <chr>, …
Jakie typy samolotów najczęściej uczestniczą w wypadkach lotniczych? W jakich warunkach pogodowych? Podczas jakich manewrów
## # A tibble: 16 × 2
## # Groups: Aircraft.Category [16]
## Aircraft.Category n
## <chr> <int>
## 1 <NA> 56602
## 2 Airplane 27617
## 3 Helicopter 3440
## 4 Glider 508
## 5 Balloon 231
## 6 Gyrocraft 173
## 7 Weight-Shift 161
## 8 Powered Parachute 91
## 9 Ultralight 30
## 10 Unknown 14
## 11 WSFT 9
## 12 Powered-Lift 5
## 13 Blimp 4
## 14 UNK 2
## 15 Rocket 1
## 16 ULTR 1
Wykres kolumnowy typu statków powietrznych i ich ilości, które brały udział w wypadkch lotniczych.
ggplot(AviationData_Category) + geom_col(mapping=aes(x=Aircraft.Category, y=n, fill=Aircraft.Category)) + theme(axis.title.x=element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.ticks.y=element_blank())
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.2'
## (as 'lib' is unspecified)
## # A tibble: 47 × 2
## # Groups: year [47]
## year n
## <chr> <int>
## 1 1982 3593
## 2 1983 3556
## 3 1984 3457
## 4 1985 3096
## 5 1986 2880
## 6 1987 2828
## 7 1988 2730
## 8 1989 2544
## 9 1990 2518
## 10 1991 2462
## # … with 37 more rows
## # A tibble: 5 × 2
## # Groups: Aircraft.damage [5]
## Aircraft.damage n
## <chr> <int>
## 1 Substantial 64148
## 2 Destroyed 18623
## 3 <NA> 3194
## 4 Minor 2805
## 5 Unknown 119
ggplot(AviationData_Damage) + geom_col(mapping=aes(x=Aircraft.damage, y=n, fill=Aircraft.damage))
Dane prezentują wypadki podczas poszczególnych faz lotu
AviationData_Phase <- AviationData %>% select(Weather.Condition, Broad.phase.of.flight) %>% group_by(Broad.phase.of.flight) %>% count(Broad.phase.of.flight) %>% arrange(desc(n))
ggplot(AviationData_Phase)+geom_col(mapping=aes(x=Broad.phase.of.flight, y=n, fill=Broad.phase.of.flight))+theme(axis.title.x=element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank())