install.packages ("tidyverse")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.2'
## (as 'lib' is unspecified)
library (tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.1 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
Żródło zestawu danych: https://www.kaggle.com/datasets/khsamaha/aviation-accident-database-synopses
aviation.accident.data <- read.csv("/cloud/project/aviation-accident-data-2023-05-16.csv")
aviation_accident <- aviation.accident.data
## date type registration operator fatalities
## 1 date unk. Antonov An-12B T-1206 Indonesian AF
## 2 date unk. Antonov An-12B T-1204 Indonesian AF
## 3 date unk. Antonov An-12B T-1201 Indonesian AF
## 4 date unk. Antonov An-12BK Soviet AF
## 5 date unk. Antonov An-12BP CCCP-11815 Soviet AF 0
## location country cat year
## 1 Unknown country U1 unknown
## 2 Unknown country U1 unknown
## 3 Unknown country U1 unknown
## 4 Tiksi Airport (IKS) Russia A1 unknown
## 5 Massawa Airport ... Eritrea A1 unknown
## [1] "date" "type" "registration" "operator" "fatalities"
## [6] "location" "country" "cat" "year"
## Rows: 23,967
## Columns: 9
## $ date <chr> "date unk.", "date unk.", "date unk.", "date unk.", "date…
## $ type <chr> "Antonov An-12B", "Antonov An-12B", "Antonov An-12B", "An…
## $ registration <chr> "T-1206", "T-1204", "T-1201", "", "CCCP-11815", "CCCP-121…
## $ operator <chr> "Indonesian AF", "Indonesian AF", "Indonesian AF", "Sovie…
## $ fatalities <chr> "", "", "", "", "0", "", "", "0", "0", "0", "0", "0", "",…
## $ location <chr> "", "", "", "Tiksi Airport (IKS)", "Massawa Airport ...",…
## $ country <chr> "Unknown country", "Unknown country", "Unknown country", …
## $ cat <chr> "U1", "U1", "U1", "A1", "A1", "U1", "A1", "A2", "A1", "O1…
## $ year <chr> "unknown", "unknown", "unknown", "unknown", "unknown", "u…
aviation_accident_by_year <- aviation.accident.data %>% group_by(year) %>% count(year) %>% mutate(number=n) %>% arrange(desc(number))
aviation_accident_by_year <- aviation.accident.data %>% group_by(year) %>% filter(year>1945&year!='unknown') %>% count(year) %>% mutate(number=n) %>% arrange(desc(number))
ggplot(data=aviation_accident_by_year) + geom_point(mapping=aes(x=year, y=number, color=number)) + labs(title = "Number of aviation accidents by year")+theme(axis.text.x=element_blank(), axis.ticks = element_blank())
aviation_accident_by_plane <- aviation.accident.data %>% group_by(type) %>% count(type) %>% mutate(number=n) %>% arrange(desc(number))
aviation_accident_by_plane_and_year <- aviation.accident.data %>% group_by(type,year,country) %>% filter(year>1945) %>% count(type,year,country) %>%mutate(number=n) %>% arrange(desc(number))
accident_By_plane <- aviation_accident_by_plane_and_year %>% select(type, year, country, number)
print(accident_By_plane)
## # A tibble: 16,170 × 4
## # Groups: type, year, country [16,170]
## type year country number
## <chr> <chr> <chr> <int>
## 1 Boeing 727 1969 Cuba 15
## 2 Cessna T-47A Citation II 1993 USA 13
## 3 Douglas C-47A (DC-3) 1946 USA 11
## 4 Douglas Dakota IV (DC-3) 1946 Myanmar 11
## 5 Avro 685 York C.1 1947 U.K. 10
## 6 U-1A Otter (DHC-3) 1972 Cambodia 10
## 7 Antonov An-2R 1973 Russia 9
## 8 Boeing 727 1972 USA 9
## 9 Douglas C-47A (DC-3) 1951 USA 9
## 10 Lisunov Li-2 1951 Russia 9
## # … with 16,160 more rows
accident_By_plane_more_then_9 <- accident_By_plane %>% filter(number>9)
ggplot(accident_By_plane_more_then_9, aes(x=year, y=number)) + geom_point(aes(color=type, shape=type)) + theme(legend.position = "bottom") +
guides(colour = guide_legend(nrow = 1, override.aes = list(size = 4)))
Print the head - top 10 variables
aviation_plain_type_Antonov <- aviation_accident[grep("Ant", aviation_accident$type),]
aviation_Antonov_group <- aviation_plain_type_Antonov %>% group_by(type) %>% count(type) %>% arrange(desc(n))
head(aviation_Antonov_group[,1:2],10)
## # A tibble: 10 × 2
## # Groups: type [10]
## type n
## <chr> <int>
## 1 Antonov An-2R 391
## 2 Antonov An-2 185
## 3 Antonov An-26 137
## 4 Antonov An-2T 116
## 5 Antonov An-24RV 70
## 6 Antonov An-2TP 68
## 7 Antonov An-12BP 67
## 8 Antonov An-12B 65
## 9 Antonov An-12 64
## 10 Antonov An-24B 62
aviation_Antonov_group_sel_10_top <- aviation_Antonov_group %>% filter(n>=62)
ggplot(aviation_Antonov_group_sel_10_top, aes(type,n))+ geom_col(aes(fill=type)) + theme(legend.position = "bottom")+theme(axis.text.x=element_blank(), axis.ticks = element_blank())
aviation_plain_type_An_1 <- aviation.accident.data[grep("An", aviation.accident.data$type),]
aviation_plain_type_An2_country <- aviation_plain_type_An_1 %>% group_by(type) %>% filter(country=='Poland')
print(aviation_plain_type_An2_country)
## # A tibble: 65 × 9
## # Groups: type [11]
## date type regis…¹ opera…² fatal…³ locat…⁴ country cat year
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 date unk. Antonov An-2R "-" PZL-Mi… "0" "Miele… Poland O1 unkn…
## 2 25-MAR-1961 Antonov An-2 "3" Polish… "" "" Poland A1 1961
## 3 22-AUG-1967 Antonov An-2M "635" Polish… "" "Chyno… Poland A1 1967
## 4 09-OCT-1967 Antonov An-2T "58" Polish… "" "" Poland A1 1967
## 5 28-MAY-1968 Antonov An-2M "636" Polish… "" "near … Poland A1 1968
## 6 24-JAN-1969 Antonov An-2… "SP-LT… LOT "0" "near … Poland A1 1969
## 7 02-APR-1969 Antonov An-2… "SP-LT… LOT "53" "near … Poland A1 1969
## 8 ??-???-1969 Antonov An-2R "SP-CN… APRL "" "" Poland A1 1969
## 9 26-AUG-1970 Antonov An-24 "" LOT "0" "Katow… Poland H2 1970
## 10 28-FEB-1973 Antonov An-2… "012" Polish… "18" "near … Poland A1 1973
## # … with 55 more rows, and abbreviated variable names ¹registration, ²operator,
## # ³fatalities, ⁴location