Vybrala som si súbor dát, v ktorom sú údaje o HDP krajín od roku 1975-2025. Skúšala som základné prvy práce s datasetom + s vecami, ktoré som nevedela sama mi pomohol chat gpt
library(readr) #táto knižnica číta csv súbory
getwd()
## [1] "/cloud/project"
list.files()
## [1] "GDP_1975_2025_uploaded.csv" "Práca s údajmi.Rmd"
## [3] "Práca-s-údajmi.html" "Práca-s-údajmi.Rmd"
## [5] "project.Rproj"
data <- read.csv("GDP_1975_2025_uploaded.csv", check.names = FALSE)
summary(data) #Súhrn deskriptívnej štatistiky datasetu
## Country 1975 1976 1977
## Length:205 Min. : 4 Min. : 4 Min. : 4.0
## Class :character 1st Qu.: 585 1st Qu.: 685 1st Qu.: 701.5
## Mode :character Median : 2610 Median : 2814 Median : 3139.0
## Mean : 36715 Mean : 39632 Mean : 44606.0
## 3rd Qu.: 13654 3rd Qu.: 15626 3rd Qu.: 18762.5
## Max. :1623400 Max. :1809100 Max. :2013600.0
## NA's :30 NA's :30 NA's :30
## 1978 1979 1980 1981
## Min. : 4 Min. : 4 Min. : 32 Min. : 30
## 1st Qu.: 800 1st Qu.: 870 1st Qu.: 1988 1st Qu.: 1695
## Median : 3020 Median : 3709 Median : 7327 Median : 7578
## Mean : 52869 Mean : 60831 Mean : 78015 Mean : 79502
## 3rd Qu.: 21096 3rd Qu.: 24343 3rd Qu.: 41865 3rd Qu.: 48156
## Max. :2276000 Max. :2543500 Max. :2857325 Max. :3207025
## NA's :30 NA's :30 NA's :61 NA's :60
## 1982 1983 1984 1985
## Min. : 33 Min. : 31 Min. : 34 Min. : 26
## 1st Qu.: 1548 1st Qu.: 1767 1st Qu.: 1579 1st Qu.: 1808
## Median : 8170 Median : 7898 Median : 7891 Median : 6634
## Mean : 78010 Mean : 80101 Mean : 82957 Mean : 86010
## 3rd Qu.: 49535 3rd Qu.: 48401 3rd Qu.: 46358 3rd Qu.: 45498
## Max. :3343800 Max. :3634025 Max. :4037650 Max. :4339000
## NA's :60 NA's :60 NA's :60 NA's :59
## 1986 1987 1988 1989
## Min. : 26 Min. : 30 Min. : 38 Min. : 38
## 1st Qu.: 2008 1st Qu.: 2256 1st Qu.: 2330 1st Qu.: 2336
## Median : 6746 Median : 8149 Median : 7605 Median : 8550
## Mean : 100656 Mean : 115877 Mean : 130296 Mean : 137087
## 3rd Qu.: 46584 3rd Qu.: 51422 3rd Qu.: 60043 3rd Qu.: 58424
## Max. :4579625 Max. :4855250 Max. :5236425 Max. :5641600
## NA's :58 NA's :58 NA's :58 NA's :58
## 1990 1991 1992 1993
## Min. : 37 Min. : 41 Min. : 62 Min. : 59
## 1st Qu.: 2518 1st Qu.: 2528 1st Qu.: 2015 1st Qu.: 2164
## Median : 7494 Median : 6984 Median : 7158 Median : 7349
## Mean : 148936 Mean : 154708 Mean : 150045 Mean : 152550
## 3rd Qu.: 61364 3rd Qu.: 53519 3rd Qu.: 52275 3rd Qu.: 56058
## Max. :5963125 Max. :6158125 Max. :6520325 Max. :6858550
## NA's :53 NA's :52 NA's :36 NA's :34
## 1994 1995 1996 1997
## Min. : 67 Min. : 69 Min. : 81 Min. : 80
## 1st Qu.: 2164 1st Qu.: 2365 1st Qu.: 2874 1st Qu.: 2550
## Median : 8004 Median : 8544 Median : 8794 Median : 9970
## Mean : 162124 Mean : 175698 Mean : 179601 Mean : 178008
## 3rd Qu.: 57517 3rd Qu.: 67803 3rd Qu.: 71986 3rd Qu.: 80572
## Max. :7287250 Max. :7639750 Max. :8073125 Max. :8577550
## NA's :32 NA's :27 NA's :26 NA's :25
## 1998 1999 2000 2001
## Min. : 73 Min. : 77 Min. : 15 Min. : 14
## 1st Qu.: 2585 1st Qu.: 2646 1st Qu.: 1963 1st Qu.: 1992
## Median : 10251 Median : 10502 Median : 9448 Median : 9424
## Mean : 176149 Mean : 182420 Mean : 178796 Mean : 176624
## 3rd Qu.: 75304 3rd Qu.: 75596 3rd Qu.: 61936 3rd Qu.: 68519
## Max. :9062825 Max. :9631175 Max. :10250950 Max. :10581925
## NA's :24 NA's :24 NA's :14 NA's :13
## 2002 2003 2004 2005
## Min. : 17 Min. : 20 Min. : 23 Min. : 23
## 1st Qu.: 2301 1st Qu.: 2854 1st Qu.: 3180 1st Qu.: 3400
## Median : 9972 Median : 11429 Median : 13590 Median : 15434
## Mean : 181505 Mean : 203827 Mean : 228248 Mean : 247334
## 3rd Qu.: 70295 3rd Qu.: 82408 3rd Qu.: 94230 3rd Qu.: 107327
## Max. :10929100 Max. :11456450 Max. :12217175 Max. :13039200
## NA's :12 NA's :12 NA's :11 NA's :11
## 2006 2007 2008 2009
## Min. : 24 Min. : 25 Min. : 32 Min. : 28
## 1st Qu.: 4012 1st Qu.: 4785 1st Qu.: 5704 1st Qu.: 5497
## Median : 18032 Median : 21732 Median : 26161 Median : 24834
## Mean : 267961 Mean : 302552 Mean : 332171 Mean : 314692
## 3rd Qu.: 120486 3rd Qu.: 143066 3rd Qu.: 180204 3rd Qu.: 166425
## Max. :13815600 Max. :14474250 Max. :14769850 Max. :14478050
## NA's :11 NA's :11 NA's :11 NA's :11
## 2010 2011 2012 2013
## Min. : 33 Min. : 40 Min. : 39 Min. : 39
## 1st Qu.: 6888 1st Qu.: 6834 1st Qu.: 7623 1st Qu.: 8098
## Median : 27008 Median : 29524 Median : 30939 Median : 33823
## Mean : 344314 Mean : 380419 Mean : 387838 Mean : 399313
## 3rd Qu.: 175921 3rd Qu.: 205589 3rd Qu.: 213518 3rd Qu.: 228191
## Max. :15048975 Max. :15599725 Max. :16253950 Max. :16880675
## NA's :11 NA's :10 NA's :10 NA's :10
## 2014 2015 2016 2017
## Min. : 39 Min. : 37 Min. : 41 Min. : 45
## 1st Qu.: 8652 1st Qu.: 8202 1st Qu.: 7562 1st Qu.: 8598
## Median : 36396 Median : 33241 Median : 33885 Median : 37205
## Mean : 410172 Mean : 387278 Mean : 393934 Mean : 419054
## 3rd Qu.: 225777 3rd Qu.: 190979 3rd Qu.: 194696 3rd Qu.: 213554
## Max. :17608125 Max. :18295000 Max. :18804900 Max. :19612100
## NA's :10 NA's :10 NA's :10 NA's :10
## 2018 2019 2020 2021
## Min. : 48 Min. : 54 Min. : 52 Min. : 62
## 1st Qu.: 8954 1st Qu.: 9860 1st Qu.: 9588 1st Qu.: 11142
## Median : 39568 Median : 37925 Median : 35334 Median : 37719
## Mean : 444983 Mean : 451423 Mean : 437889 Mean : 504350
## 3rd Qu.: 236196 3rd Qu.: 238412 3rd Qu.: 207481 3rd Qu.: 254613
## Max. :20656525 Max. :21539975 Max. :21354125 Max. :23681175
## NA's :10 NA's :10 NA's :9 NA's :11
## 2022 2023 2024 2025
## Min. : 61 Min. : 63 Min. : 65 Min. : 65
## 1st Qu.: 12650 1st Qu.: 13604 1st Qu.: 13500 1st Qu.: 14214
## Median : 41568 Median : 43631 Median : 47136 Median : 47829
## Mean : 525506 Mean : 548617 Mean : 575687 Mean : 599141
## 3rd Qu.: 274631 3rd Qu.: 285036 3rd Qu.: 291511 3rd Qu.: 303293
## Max. :26006900 Max. :27720725 Max. :29184900 Max. :30507217
## NA's :11 NA's :11 NA's :13 NA's :16
Zaujímalo ma, ktorých top 5 krajín malo najvyššie HDP v roku 1975 vs. 2025
# Načítanie CSV súboru
gdp <- read.csv("GDP_1975_2025_uploaded.csv", check.names = FALSE)
# Top 5 krajín podľa HDP v roku 1975
top_1975 <- gdp[order(-gdp[["1975"]]), c("Country", "1975")][1:5, ]
print(top_1975)
## Country 1975
## 196 United States 1623400
## 171 Soviet Union 685972
## 88 Japan 512861
## 66 Germany 474785
## 62 France 355617
# Top 5 krajín podľa HDP v roku 2025
top_2025 <- gdp[order(-gdp[["2025"]]), c("Country", "2025")][1:5, ]
print(top_2025)
## Country 2025
## 196 United States 30507217
## 37 China 19231705
## 66 Germany 4744804
## 79 India 4187017
## 88 Japan 4186431
#stiahnem si knižnice, ktoré mi pomôžu vykresliť pekné grafy
library(ggplot2)
library(reshape2)
# Vyber len USA a Čínu
usa <- gdp[gdp$Country == "United States", -1]
china <- gdp[gdp$Country == "China", -1]
# Získaj roky ako čísla
roky <- as.numeric(names(usa))
# Nakresli graf pre USA
plot(roky, as.numeric(usa[1, ]), type = "l", col = "blue", lwd = 2,
xlab = "Rok", ylab = "HDP (v miliónoch USD)",
main = "Vývoj HDP: USA vs. Čína (1975–2025)")
#Graf pre Čínu
lines(roky, as.numeric(china[1, ]), col = "red", lwd = 2)
Ako posledný krok si chcem pripraviť peknú tabuľku z údajov s použitím knižníc knitr, dplyr a kableExtra
# Načítanie potrebných knižníc
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(knitr)
library(textshaping)
library(kableExtra)
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
# Základná deskriptívna štatistika pre vybrané roky (1975, 2025)
gdp.stats <- gdp %>%
select(Country, `1975`, `2025`) %>%
summarise(
n = n(),
mean_1975 = mean(`1975`, na.rm = TRUE), #vďaka na.rm nepočíta s chýbajúcimi číslami v datasete
sd_1975 = sd(`1975`, na.rm = TRUE),
min_1975 = min(`1975`, na.rm = TRUE),
max_1975 = max(`1975`, na.rm = TRUE),
mean_2025 = mean(`2025`, na.rm = TRUE),
sd_2025 = sd(`2025`, na.rm = TRUE),
min_2025 = min(`2025`, na.rm = TRUE),
max_2025 = max(`2025`, na.rm = TRUE)
)
gdp.stats;
## n mean_1975 sd_1975 min_1975 max_1975 mean_2025 sd_2025 min_2025 max_2025
## 1 205 36714.87 146931.1 4 1623400 599141.4 2693421 65 30507217
# pekná tabuľka
top_2025%>%
kable(
digits = 2,
caption = "Top 5 krajín v roku 2025"
) %>%
kable_styling(
full_width = FALSE,
bootstrap_options = c("striped", "hover", "condensed", "responsive")
) %>%
column_spec(1, bold = TRUE) %>%
row_spec(0, bold = TRUE, background = "#f2f2f2")
Country | 2025 | |
---|---|---|
196 | United States | 30507217 |
37 | China | 19231705 |
66 | Germany | 4744804 |
79 | India | 4187017 |
88 | Japan | 4186431 |