install.packages ("tidyverse")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.2'
## (as 'lib' is unspecified)
library (tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.1     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors

Aviation accident

Żródło zestawu danych: https://www.kaggle.com/datasets/khsamaha/aviation-accident-database-synopses

aviation.accident.data <- read.csv("/cloud/project/aviation-accident-data-2023-05-16.csv")
aviation_accident <- aviation.accident.data

Dataset scheme

##        date            type registration      operator fatalities
## 1 date unk.  Antonov An-12B       T-1206 Indonesian AF           
## 2 date unk.  Antonov An-12B       T-1204 Indonesian AF           
## 3 date unk.  Antonov An-12B       T-1201 Indonesian AF           
## 4 date unk. Antonov An-12BK                  Soviet AF           
## 5 date unk. Antonov An-12BP   CCCP-11815     Soviet AF          0
##              location         country cat    year
## 1                     Unknown country  U1 unknown
## 2                     Unknown country  U1 unknown
## 3                     Unknown country  U1 unknown
## 4 Tiksi Airport (IKS)          Russia  A1 unknown
## 5 Massawa Airport ...         Eritrea  A1 unknown
## [1] "date"         "type"         "registration" "operator"     "fatalities"  
## [6] "location"     "country"      "cat"          "year"
## Rows: 23,967
## Columns: 9
## $ date         <chr> "date unk.", "date unk.", "date unk.", "date unk.", "date…
## $ type         <chr> "Antonov An-12B", "Antonov An-12B", "Antonov An-12B", "An…
## $ registration <chr> "T-1206", "T-1204", "T-1201", "", "CCCP-11815", "CCCP-121…
## $ operator     <chr> "Indonesian AF", "Indonesian AF", "Indonesian AF", "Sovie…
## $ fatalities   <chr> "", "", "", "", "0", "", "", "0", "0", "0", "0", "0", "",…
## $ location     <chr> "", "", "", "Tiksi Airport (IKS)", "Massawa Airport ...",…
## $ country      <chr> "Unknown country", "Unknown country", "Unknown country", …
## $ cat          <chr> "U1", "U1", "U1", "A1", "A1", "U1", "A1", "A2", "A1", "O1…
## $ year         <chr> "unknown", "unknown", "unknown", "unknown", "unknown", "u…

Aviation accident by year

aviation_accident_by_year <- aviation.accident.data %>% group_by(year) %>% count(year) %>%  mutate(number=n) %>%  arrange(desc(number))
aviation_accident_by_year <- aviation.accident.data %>% group_by(year) %>% filter(year>1945&year!='unknown') %>% count(year) %>%  mutate(number=n) %>%  arrange(desc(number))

Graficzne przedstawienie danych

ggplot(data=aviation_accident_by_year) + geom_point(mapping=aes(x=year, y=number, color=number)) + labs(title = "Number of aviation accidents by year")+theme(axis.text.x=element_blank(), axis.ticks = element_blank())

Aviation accident by plane(type)

aviation_accident_by_plane <- aviation.accident.data %>% group_by(type) %>% count(type) %>%  mutate(number=n) %>%  arrange(desc(number))
aviation_accident_by_plane_and_year <- aviation.accident.data %>% group_by(type,year,country) %>% filter(year>1945) %>% count(type,year,country) %>%mutate(number=n) %>%   arrange(desc(number))
accident_By_plane <- aviation_accident_by_plane_and_year %>% select(type, year, country, number)
print(accident_By_plane)
## # A tibble: 16,170 × 4
## # Groups:   type, year, country [16,170]
##    type                     year  country  number
##    <chr>                    <chr> <chr>     <int>
##  1 Boeing 727               1969  Cuba         15
##  2 Cessna T-47A Citation II 1993  USA          13
##  3 Douglas C-47A (DC-3)     1946  USA          11
##  4 Douglas Dakota IV (DC-3) 1946  Myanmar      11
##  5 Avro 685 York C.1        1947  U.K.         10
##  6 U-1A Otter (DHC-3)       1972  Cambodia     10
##  7 Antonov An-2R            1973  Russia        9
##  8 Boeing 727               1972  USA           9
##  9 Douglas C-47A (DC-3)     1951  USA           9
## 10 Lisunov Li-2             1951  Russia        9
## # … with 16,160 more rows

Plot for more then 9 accidents

accident_By_plane_more_then_9 <- accident_By_plane %>% filter(number>9)
ggplot(accident_By_plane_more_then_9, aes(x=year, y=number)) + geom_point(aes(color=type, shape=type)) + theme(legend.position = "bottom") +
  guides(colour = guide_legend(nrow = 1, override.aes = list(size = 4)))

Aviation accident by plane(type) Antonov

Print the head - top 10 variables

aviation_plain_type_Antonov <- aviation_accident[grep("Ant", aviation_accident$type),] 
aviation_Antonov_group <- aviation_plain_type_Antonov %>% group_by(type) %>% count(type) %>% arrange(desc(n))
head(aviation_Antonov_group[,1:2],10)
## # A tibble: 10 × 2
## # Groups:   type [10]
##    type                n
##    <chr>           <int>
##  1 Antonov An-2R     391
##  2 Antonov An-2      185
##  3 Antonov An-26     137
##  4 Antonov An-2T     116
##  5 Antonov An-24RV    70
##  6 Antonov An-2TP     68
##  7 Antonov An-12BP    67
##  8 Antonov An-12B     65
##  9 Antonov An-12      64
## 10 Antonov An-24B     62

Plot for plane typ Antonov

aviation_Antonov_group_sel_10_top <- aviation_Antonov_group %>% filter(n>=62)
ggplot(aviation_Antonov_group_sel_10_top, aes(type,n))+ geom_col(aes(fill=type)) + theme(legend.position = "bottom")+theme(axis.text.x=element_blank(), axis.ticks = element_blank())

Aviation accident by plane Antonov, location Poland

aviation_plain_type_An_1 <- aviation.accident.data[grep("An", aviation.accident.data$type),] 
aviation_plain_type_An2_country <- aviation_plain_type_An_1 %>% group_by(type) %>% filter(country=='Poland')
print(aviation_plain_type_An2_country)
## # A tibble: 65 × 9
## # Groups:   type [11]
##    date        type          regis…¹ opera…² fatal…³ locat…⁴ country cat   year 
##    <chr>       <chr>         <chr>   <chr>   <chr>   <chr>   <chr>   <chr> <chr>
##  1 date unk.   Antonov An-2R "-"     PZL-Mi… "0"     "Miele… Poland  O1    unkn…
##  2 25-MAR-1961 Antonov An-2  "3"     Polish… ""      ""      Poland  A1    1961 
##  3 22-AUG-1967 Antonov An-2M "635"   Polish… ""      "Chyno… Poland  A1    1967 
##  4 09-OCT-1967 Antonov An-2T "58"    Polish… ""      ""      Poland  A1    1967 
##  5 28-MAY-1968 Antonov An-2M "636"   Polish… ""      "near … Poland  A1    1968 
##  6 24-JAN-1969 Antonov An-2… "SP-LT… LOT     "0"     "near … Poland  A1    1969 
##  7 02-APR-1969 Antonov An-2… "SP-LT… LOT     "53"    "near … Poland  A1    1969 
##  8 ??-???-1969 Antonov An-2R "SP-CN… APRL    ""      ""      Poland  A1    1969 
##  9 26-AUG-1970 Antonov An-24 ""      LOT     "0"     "Katow… Poland  H2    1970 
## 10 28-FEB-1973 Antonov An-2… "012"   Polish… "18"    "near … Poland  A1    1973 
## # … with 55 more rows, and abbreviated variable names ¹​registration, ²​operator,
## #   ³​fatalities, ⁴​location