knitr::opts_chunk$set(
echo = TRUE,
message = FALSE,
warning = FALSE
)
library(tidyverse)
library(kableExtra)
library(ggplot2)
# Načítanie datasetu
udaje <- read.csv("Travel dataset.csv", header = TRUE, sep = ",", dec = ".")
# Zobrazenie prvých riadkov a názvov stĺpcov
head(udaje)
colnames(udaje)
[1] "Trip.ID" "Destination" "Start.date"
[4] "End.date" "Duration..days." "Traveler.name"
[7] "Traveler.age" "Traveler.gender" "Traveler.nationality"
[10] "Accommodation.type" "Accommodation.cost" "Transportation.type"
[13] "Transportation.cost"
# Počet riadkov a stĺpcov
nrow(udaje)
[1] 139
ncol(udaje)
[1] 13
# Základná štatistika
summary(udaje)
Trip.ID Destination Start.date End.date
Min. : 1.0 Length:139 Length:139 Length:139
1st Qu.: 35.5 Class :character Class :character Class :character
Median : 70.0 Mode :character Mode :character Mode :character
Mean : 70.0
3rd Qu.:104.5
Max. :139.0
Duration..days. Traveler.name Traveler.age Traveler.gender
Min. : 5.000 Length:139 Min. :20.00 Length:139
1st Qu.: 7.000 Class :character 1st Qu.:28.00 Class :character
Median : 7.000 Mode :character Median :31.00 Mode :character
Mean : 7.606 Mean :33.18
3rd Qu.: 8.000 3rd Qu.:38.00
Max. :14.000 Max. :60.00
NA's :2 NA's :2
Traveler.nationality Accommodation.type Accommodation.cost Transportation.type
Length:139 Length:139 Length:139 Length:139
Class :character Class :character Class :character Class :character
Mode :character Mode :character Mode :character Mode :character
Transportation.cost
Length:139
Class :character
Mode :character
# Výber konkrétnych premenných (napr. krajina, trvanie cesty, náklady)
udaje %>%
select(Destination, Traveler.name, Traveler.age) %>%
head(10) %>%
kable(caption = "Ukážka vybraných premenných z datasetu") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
Destination | Traveler.name | Traveler.age |
---|---|---|
London, UK | John Smith | 35 |
Phuket, Thailand | Jane Doe | 28 |
Bali, Indonesia | David Lee | 45 |
New York, USA | Sarah Johnson | 29 |
Tokyo, Japan | Kim Nguyen | 26 |
Paris, France | Michael Brown | 42 |
Sydney, Australia | Emily Davis | 33 |
Rio de Janeiro, Brazil | Lucas Santos | 25 |
Amsterdam, Netherlands | Laura Janssen | 31 |
Dubai, United Arab Emirates | Mohammed Ali | 39 |
# Vyber len cesty, ktoré trvali viac ako 5 dní a zorad ich podľa výdavkov
udaje %>%
filter(Duration..days. > 5) %>%
arrange(desc(Duration..days.)) %>%
head(10) %>%
kable(caption = "Najdrahšie cesty s trvaním nad 5 dní") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
Trip.ID | Destination | Start.date | End.date | Duration..days. | Traveler.name | Traveler.age | Traveler.gender | Traveler.nationality | Accommodation.type | Accommodation.cost | Transportation.type | Transportation.cost |
---|---|---|---|---|---|---|---|---|---|---|---|---|
4 | New York, USA | 8/15/2023 | 8/29/2023 | 14 | Sarah Johnson | 29 | Female | British | Hotel | 2000 | Flight | 1000 |
31 | Australia | 8/20/2022 | 9/2/2022 | 13 | Emma Davis | 28 | Female | British | Hotel | $1,000 | Car rental | $500 |
86 | Bali | 8/10/2021 | 8/20/2021 | 11 | Maria Garcia | 42 | Female | Spanish | Resort | 1200 USD | Plane | 700 USD |
89 | London | 11/20/2021 | 11/30/2021 | 11 | James Wilson | 29 | Male | British | Hostel | 300 USD | Plane | 400 USD |
92 | Rome | 3/10/2022 | 3/20/2022 | 11 | Giulia Rossi | 30 | Female | Italian | Hostel | 200 USD | Plane | 350 USD |
93 | Bali | 4/15/2022 | 4/25/2022 | 11 | Putra Wijaya | 33 | Male | Indonesian | Villa | 1500 USD | Car rental | 300 USD |
119 | Sydney, Aus | 5/1/2022 | 5/12/2022 | 11 | Cindy Chen | 26 | Female | Chinese | Airbnb | 800 | Plane | 1000 |
7 | Sydney, Australia | 11/20/2023 | 11/30/2023 | 10 | Emily Davis | 33 | Female | Australian | Hostel | 500 | Flight | 1200 |
18 | Bali | 8/15/2023 | 8/25/2023 | 10 | Michael Chang | 28 | Male | Chinese | Resort | $1,500 | Plane | $700 |
20 | Tokyo | 10/5/2023 | 10/15/2023 | 10 | Kenji Nakamura | 45 | Male | Japanese | Hotel | $1,200 | Plane | $800 |
ggplot(udaje, aes(x = Duration..days., y = Accommodation.cost, color = Transportation.cost)) +
geom_point(alpha = 0.6) +
theme_minimal() +
labs(
title = "Závislosť medzi dĺžkou cesty a výdavkami",
x = "Dĺžka cesty (dni)",
y = "Výdavky ($)"
)
# Priemerné výdavky podľa typu dopravy
udaje %>%
group_by(Transportation.type) %>%
summarise(
Priemerne_naklady = mean(Transportation.cost, na.rm = FALSE),
Pocet_ciest = n()
) %>%
kable(caption = "Priemerné výdavky podľa spôsobu dopravy") %>%
kable_styling(bootstrap_options = c("striped", "hover"))
Transportation.type | Priemerne_naklady | Pocet_ciest |
---|---|---|
NA | 3 | |
Airplane | NA | 5 |
Bus | NA | 6 |
Car | NA | 3 |
Car rental | NA | 13 |
Ferry | NA | 1 |
Flight | NA | 13 |
Plane | NA | 57 |
Subway | NA | 1 |
Train | NA | 37 |
NA
udaje <- udaje %>%
mutate(
Cena_kategorie = case_when(
Transportation.cost < 500 ~ "Nízke náklady",
Transportation.cost < 1500 ~ "Stredné náklady",
TRUE ~ "Vysoké náklady"
)
)
head(udaje)
# Scatterplot – závislosť medzi dĺžkou cesty a výdavkami
ggplot(udaje, aes(x = Duration..days., y = Accommodation.cost, color = Transportation.cost)) +
geom_point(alpha = 0.6) +
theme_minimal() +
labs(
title = "Závislosť medzi dĺžkou cesty a výdavkami",
x = "Dĺžka cesty (dni)",
y = "Výdavky ($)"
)
This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
plot(cars)
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).
The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.