knitr::opts_chunk$set(
    echo = TRUE,
    message = FALSE,
    warning = FALSE
)

#Načítanie a príprava dát Dataset obsahuje informácie o jednotlivých cestách – destináciu, trvanie, vek cestujúceho, pohlavie, národnosť, typ ubytovania a náklady na ubytovanie a dopravu. Numerické premenné použité v datasete sú: Duration (days) – dĺžka cesty v dňoch Traveler age – vek cestujúceho Accommodation cost – cena ubytovania Transportation cost – cena dopravy

library(readr) 
travel <- read.csv("Travel_data.csv", sep = ";")

#Kontrola štruktúry
str(travel)
'data.frame':   137 obs. of  13 variables:
 $ Trip.ID             : int  1 2 3 4 5 6 7 8 9 10 ...
 $ Destination         : chr  "London, UK" "Phuket, Thailand" "Bali, Indonesia" "New York, USA" ...
 $ Start.date          : chr  "01/05/2023" "15/06/2023" "01/07/2023" "15/08/2023" ...
 $ End.date            : chr  "08/05/2023" "20/06/2023" "08/07/2023" "29/08/2023" ...
 $ Duration..days.     : int  7 5 7 14 7 5 10 7 7 7 ...
 $ Traveler.name       : chr  "John Smith" "Jane Doe" "David Lee" "Sarah Johnson" ...
 $ Traveler.age        : int  35 28 45 29 26 42 33 25 31 39 ...
 $ Traveler.gender     : chr  "Male" "Female" "Male" "Female" ...
 $ Traveler.nationality: chr  "American" "Canadian" "Korean" "British" ...
 $ Accommodation.type  : chr  "Hotel" "Resort" "Villa" "Hotel" ...
 $ Accommodation.cost  : int  1200 800 1000 2000 700 1500 500 900 1200 2500 ...
 $ Transportation.type : chr  "Flight" "Flight" "Flight" "Flight" ...
 $ Transportation.cost : int  600 500 700 1000 200 800 1200 600 200 800 ...
head(travel)

#Prevod typov premenných
library(dplyr)

#Výber numerických premenných
travel.num <- travel %>%
  select(Duration..days., Traveler.age, Accommodation.cost, Transportation.cost)

#Základné štatistiky

library(knitr)
library(kableExtra)

summary_stats <- travel.num %>%
  summarise(
    n = n(),
    mean_duration = mean(Duration..days., na.rm = TRUE),
    sd_duration = sd(Duration..days., na.rm = TRUE),
    mean_age = mean(Traveler.age, na.rm = TRUE),
    sd_age = sd(Traveler.age, na.rm = TRUE),
    mean_accommodation = mean(Accommodation.cost, na.rm = TRUE),
    sd_accommodation = sd(Accommodation.cost, na.rm = TRUE),
    mean_transport = mean(Transportation.cost, na.rm = TRUE),
    sd_transport = sd(Transportation.cost, na.rm = TRUE)
  )

kable(summary_stats, digits = 2, caption = "Základné štatistiky cestovateľských údajov") %>%
  kable_styling(full_width = FALSE, bootstrap_options = c("striped", "hover", "condensed"))
Základné štatistiky cestovateľských údajov
n mean_duration sd_duration mean_age sd_age mean_accommodation sd_accommodation mean_transport sd_transport
137 7.61 1.6 33.18 7.15 1245.11 1337.35 645.18 584.48
NA

#Grafy ##Scatterplot Graf ukazuje, ako spolu súvisia výdavky na ubytovanie a dopravu — destinácie s vyššími nákladmi na ubytovanie majú často aj vyššie dopravné náklady.

ggplot(travel, aes(x = Accommodation.cost, y = Transportation.cost)) +
geom_point(color = "steelblue", size = 3, alpha = 0.7) +
labs(title = "Vzťah medzi nákladmi na ubytovanie a dopravou",
x = "Náklady na ubytovanie (€)",
y = "Náklady na dopravu (€)") +
theme_minimal()

##Boxplot Boxplot ukazuje, že hotely a rezorty majú spravidla vyššie mediánové ceny ako hostely či apartmány. Vidno aj prítomnosť niekoľkých extrémnych hodnôt (luxusnejšie pobyty).

ggplot(travel, aes(x = Accommodation.type, y = Accommodation.cost, fill = Accommodation.type)) +
geom_boxplot(alpha = 0.7) +
labs(title = "Rozdelenie nákladov na ubytovanie podľa typu",
x = "Typ ubytovania",
y = "Cena ubytovania (€)") +
theme_minimal() +
theme(legend.position = "none")

#Testovanie hypotéz ##T-test: Rozdiel v nákladoch na ubytovanie medzi mužmi a ženami Test zisťuje, či existuje štatisticky významný rozdiel v nákladoch na ubytovanie medzi pohlaviami.

t.test(
  travel$Accommodation.cost[travel$Traveler.gender == "Male"],
  travel$Accommodation.cost[travel$Traveler.gender == "Female"]
)

    Welch Two Sample t-test

data:  travel$Accommodation.cost[travel$Traveler.gender == "Male"] and travel$Accommodation.cost[travel$Traveler.gender == "Female"]
t = -0.23362, df = 126.58, p-value = 0.8157
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -504.1316  397.6667
sample estimates:
mean of x mean of y 
 1217.910  1271.143 

##ANOVA: Rozdiel v nákladoch na dopravu podľa typu dopravy ANOVA testuje, či sa priemerné náklady na dopravu líšia medzi rôznymi typmi dopravy (vlak, lietadlo, autobus atď.).

anova_result <- aov(Transportation.cost ~ Transportation.type, data = travel)
summary(anova_result)
                     Df   Sum Sq Mean Sq F value Pr(>F)    
Transportation.type   8 31339350 3917419   33.66 <2e-16 ***
Residuals           127 14778320  116365                   
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
1 observation deleted due to missingness

##Lineárna regresia: Predikcia nákladov na dopravu Model skúma, ako dĺžka pobytu, vek cestovateľa a náklady na ubytovanie ovplyvňujú výšku dopravných nákladov.

model <- lm(Transportation.cost ~ Duration..days. + Traveler.age + Accommodation.cost, data = travel)
summary(model)

Call:
lm(formula = Transportation.cost ~ Duration..days. + Traveler.age + 
    Accommodation.cost, data = travel)

Residuals:
     Min       1Q   Median       3Q      Max 
-1113.24  -264.22   -69.84   226.15  1121.18 

Coefficients:
                     Estimate Std. Error t value Pr(>|t|)    
(Intercept)        -185.37312  224.60543  -0.825   0.4107    
Duration..days.      38.91657   19.51562   1.994   0.0482 *  
Traveler.age          2.91813    4.35678   0.670   0.5042    
Accommodation.cost    0.34921    0.02327  15.005   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 358.8 on 132 degrees of freedom
  (1 observation deleted due to missingness)
Multiple R-squared:  0.6315,    Adjusted R-squared:  0.6232 
F-statistic: 75.41 on 3 and 132 DF,  p-value: < 2.2e-16

#Heatmapa korelačnej matice Heatmapa vizualizuje korelácie medzi numerickými premennými. Napríklad silná korelácia medzi nákladmi na ubytovanie a dopravou môže naznačovať luxusnejší štýl cestovania.

install.packages("corrplot")
Error in install.packages : Updating loaded packages
library(corrplot)

cor_matrix <- cor(travel.num, use = "complete.obs")
corrplot(cor_matrix, method = "color", type = "upper", tl.col = "black", tl.srt = 45)

This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.

plot(cars)

Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).

The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.

LS0tCnRpdGxlOiAiUHLDoWFjYSBzIGTDoXRhbWkgLSBncmFmeSwgdGFidcS+a3kiCmF1dGhvcjogIkJhcmJvcmEgQ2FwZWtvdsOhIgpkYXRlOiAiT2t0w7NiZXIgMjAyNSIKb3V0cHV0OiAKICBodG1sX25vdGVib29rOgogICAgdG9jOiB0cnVlCiAgICB0b2NfZmxvYXQ6IHRydWUKICAgIHRoZW1lOiB1bml0ZWQKICAgIGhpZ2hsaWdodDogdGFuZ28KLS0tCmBgYHtyfQprbml0cjo6b3B0c19jaHVuayRzZXQoCiAgICBlY2hvID0gVFJVRSwKICAgIG1lc3NhZ2UgPSBGQUxTRSwKICAgIHdhcm5pbmcgPSBGQUxTRQopCmBgYAoKI05hxI3DrXRhbmllIGEgcHLDrXByYXZhIGTDoXQKRGF0YXNldCBvYnNhaHVqZSBpbmZvcm3DoWNpZSBvIGplZG5vdGxpdsO9Y2ggY2VzdMOhY2gg4oCTIGRlc3RpbsOhY2l1LCB0cnZhbmllLCB2ZWsgY2VzdHVqw7pjZWhvLCBwb2hsYXZpZSwgbsOhcm9kbm9zxaUsIHR5cCB1Ynl0b3ZhbmlhIGEgbsOha2xhZHkgbmEgdWJ5dG92YW5pZSBhIGRvcHJhdnUuCk51bWVyaWNrw6kgcHJlbWVubsOpIHBvdcW+aXTDqSB2IGRhdGFzZXRlIHPDujoKRHVyYXRpb24gKGRheXMpIOKAkyBkxLrFvmthIGNlc3R5IHYgZMWIb2NoClRyYXZlbGVyIGFnZSDigJMgdmVrIGNlc3R1asO6Y2VobwpBY2NvbW1vZGF0aW9uIGNvc3Qg4oCTIGNlbmEgdWJ5dG92YW5pYQpUcmFuc3BvcnRhdGlvbiBjb3N0IOKAkyBjZW5hIGRvcHJhdnkKYGBge3J9CmxpYnJhcnkocmVhZHIpIAp0cmF2ZWwgPC0gcmVhZC5jc3YoIlRyYXZlbF9kYXRhLmNzdiIsIHNlcCA9ICI7IikKCiNLb250cm9sYSDFoXRydWt0w7pyeQpzdHIodHJhdmVsKQpoZWFkKHRyYXZlbCkKCiNQcmV2b2QgdHlwb3YgcHJlbWVubsO9Y2gKbGlicmFyeShkcGx5cikKCiNWw71iZXIgbnVtZXJpY2vDvWNoIHByZW1lbm7DvWNoCnRyYXZlbC5udW0gPC0gdHJhdmVsICU+JQogIHNlbGVjdChEdXJhdGlvbi4uZGF5cy4sIFRyYXZlbGVyLmFnZSwgQWNjb21tb2RhdGlvbi5jb3N0LCBUcmFuc3BvcnRhdGlvbi5jb3N0KQoKYGBgCgojWsOha2xhZG7DqSDFoXRhdGlzdGlreQpgYGB7cn0KbGlicmFyeShrbml0cikKbGlicmFyeShrYWJsZUV4dHJhKQoKc3VtbWFyeV9zdGF0cyA8LSB0cmF2ZWwubnVtICU+JQogIHN1bW1hcmlzZSgKICAgIG4gPSBuKCksCiAgICBtZWFuX2R1cmF0aW9uID0gbWVhbihEdXJhdGlvbi4uZGF5cy4sIG5hLnJtID0gVFJVRSksCiAgICBzZF9kdXJhdGlvbiA9IHNkKER1cmF0aW9uLi5kYXlzLiwgbmEucm0gPSBUUlVFKSwKICAgIG1lYW5fYWdlID0gbWVhbihUcmF2ZWxlci5hZ2UsIG5hLnJtID0gVFJVRSksCiAgICBzZF9hZ2UgPSBzZChUcmF2ZWxlci5hZ2UsIG5hLnJtID0gVFJVRSksCiAgICBtZWFuX2FjY29tbW9kYXRpb24gPSBtZWFuKEFjY29tbW9kYXRpb24uY29zdCwgbmEucm0gPSBUUlVFKSwKICAgIHNkX2FjY29tbW9kYXRpb24gPSBzZChBY2NvbW1vZGF0aW9uLmNvc3QsIG5hLnJtID0gVFJVRSksCiAgICBtZWFuX3RyYW5zcG9ydCA9IG1lYW4oVHJhbnNwb3J0YXRpb24uY29zdCwgbmEucm0gPSBUUlVFKSwKICAgIHNkX3RyYW5zcG9ydCA9IHNkKFRyYW5zcG9ydGF0aW9uLmNvc3QsIG5hLnJtID0gVFJVRSkKICApCgprYWJsZShzdW1tYXJ5X3N0YXRzLCBkaWdpdHMgPSAyLCBjYXB0aW9uID0gIlrDoWtsYWRuw6kgxaF0YXRpc3Rpa3kgY2VzdG92YXRlxL5za8O9Y2ggw7pkYWpvdiIpICU+JQogIGthYmxlX3N0eWxpbmcoZnVsbF93aWR0aCA9IEZBTFNFLCBib290c3RyYXBfb3B0aW9ucyA9IGMoInN0cmlwZWQiLCAiaG92ZXIiLCAiY29uZGVuc2VkIikpCgpgYGAKCiNHcmFmeQojI1NjYXR0ZXJwbG90CkdyYWYgdWthenVqZSwgYWtvIHNwb2x1IHPDunZpc2lhIHbDvWRhdmt5IG5hIHVieXRvdmFuaWUgYSBkb3ByYXZ1IOKAlCBkZXN0aW7DoWNpZSBzIHZ5xaHFocOtbWkgbsOha2xhZG1pIG5hIHVieXRvdmFuaWUgbWFqw7ogxI1hc3RvIGFqIHZ5xaHFoWllIGRvcHJhdm7DqSBuw6FrbGFkeS4KYGBge3J9CmdncGxvdCh0cmF2ZWwsIGFlcyh4ID0gQWNjb21tb2RhdGlvbi5jb3N0LCB5ID0gVHJhbnNwb3J0YXRpb24uY29zdCkpICsKZ2VvbV9wb2ludChjb2xvciA9ICJzdGVlbGJsdWUiLCBzaXplID0gMywgYWxwaGEgPSAwLjcpICsKbGFicyh0aXRsZSA9ICJWesWlYWggbWVkemkgbsOha2xhZG1pIG5hIHVieXRvdmFuaWUgYSBkb3ByYXZvdSIsCnggPSAiTsOha2xhZHkgbmEgdWJ5dG92YW5pZSAo4oKsKSIsCnkgPSAiTsOha2xhZHkgbmEgZG9wcmF2dSAo4oKsKSIpICsKdGhlbWVfbWluaW1hbCgpCmBgYAoKIyNCb3hwbG90CkJveHBsb3QgdWthenVqZSwgxb5lIGhvdGVseSBhIHJlem9ydHkgbWFqw7ogc3ByYXZpZGxhIHZ5xaHFoWllIG1lZGnDoW5vdsOpIGNlbnkgYWtvIGhvc3RlbHkgxI1pIGFwYXJ0bcOhbnkuIFZpZG5vIGFqIHByw610b21ub3PFpSBuaWVrb8S+a8O9Y2ggZXh0csOpbW55Y2ggaG9kbsO0dCAobHV4dXNuZWrFoWllIHBvYnl0eSkuCmBgYHtyfQpnZ3Bsb3QodHJhdmVsLCBhZXMoeCA9IEFjY29tbW9kYXRpb24udHlwZSwgeSA9IEFjY29tbW9kYXRpb24uY29zdCwgZmlsbCA9IEFjY29tbW9kYXRpb24udHlwZSkpICsKZ2VvbV9ib3hwbG90KGFscGhhID0gMC43KSArCmxhYnModGl0bGUgPSAiUm96ZGVsZW5pZSBuw6FrbGFkb3YgbmEgdWJ5dG92YW5pZSBwb2TEvmEgdHlwdSIsCnggPSAiVHlwIHVieXRvdmFuaWEiLAp5ID0gIkNlbmEgdWJ5dG92YW5pYSAo4oKsKSIpICsKdGhlbWVfbWluaW1hbCgpICsKdGhlbWUobGVnZW5kLnBvc2l0aW9uID0gIm5vbmUiKQpgYGAKI1Rlc3RvdmFuaWUgaHlwb3TDqXoKIyNULXRlc3Q6IFJvemRpZWwgdiBuw6FrbGFkb2NoIG5hIHVieXRvdmFuaWUgbWVkemkgbXXFvm1pIGEgxb5lbmFtaQpUZXN0IHppc8WldWplLCDEjWkgZXhpc3R1amUgxaF0YXRpc3RpY2t5IHbDvXpuYW1uw70gcm96ZGllbCB2IG7DoWtsYWRvY2ggbmEgdWJ5dG92YW5pZSBtZWR6aSBwb2hsYXZpYW1pLgpgYGB7cn0KdC50ZXN0KAogIHRyYXZlbCRBY2NvbW1vZGF0aW9uLmNvc3RbdHJhdmVsJFRyYXZlbGVyLmdlbmRlciA9PSAiTWFsZSJdLAogIHRyYXZlbCRBY2NvbW1vZGF0aW9uLmNvc3RbdHJhdmVsJFRyYXZlbGVyLmdlbmRlciA9PSAiRmVtYWxlIl0KKQpgYGAKCiMjQU5PVkE6IFJvemRpZWwgdiBuw6FrbGFkb2NoIG5hIGRvcHJhdnUgcG9kxL5hIHR5cHUgZG9wcmF2eQpBTk9WQSB0ZXN0dWplLCDEjWkgc2EgcHJpZW1lcm7DqSBuw6FrbGFkeSBuYSBkb3ByYXZ1IGzDrcWhaWEgbWVkemkgcsO0em55bWkgdHlwbWkgZG9wcmF2eSAodmxhaywgbGlldGFkbG8sIGF1dG9idXMgYXTEjy4pLgpgYGB7cn0KYW5vdmFfcmVzdWx0IDwtIGFvdihUcmFuc3BvcnRhdGlvbi5jb3N0IH4gVHJhbnNwb3J0YXRpb24udHlwZSwgZGF0YSA9IHRyYXZlbCkKc3VtbWFyeShhbm92YV9yZXN1bHQpCmBgYAoKIyNMaW5lw6FybmEgcmVncmVzaWE6IFByZWRpa2NpYSBuw6FrbGFkb3YgbmEgZG9wcmF2dQpNb2RlbCBza8O6bWEsIGFrbyBkxLrFvmthIHBvYnl0dSwgdmVrIGNlc3RvdmF0ZcS+YSBhIG7DoWtsYWR5IG5hIHVieXRvdmFuaWUgb3ZwbHl2xYh1asO6IHbDvcWha3UgZG9wcmF2bsO9Y2ggbsOha2xhZG92LgpgYGB7cn0KbW9kZWwgPC0gbG0oVHJhbnNwb3J0YXRpb24uY29zdCB+IER1cmF0aW9uLi5kYXlzLiArIFRyYXZlbGVyLmFnZSArIEFjY29tbW9kYXRpb24uY29zdCwgZGF0YSA9IHRyYXZlbCkKc3VtbWFyeShtb2RlbCkKYGBgCgojSGVhdG1hcGEga29yZWxhxI1uZWogbWF0aWNlCkhlYXRtYXBhIHZpenVhbGl6dWplIGtvcmVsw6FjaWUgbWVkemkgbnVtZXJpY2vDvW1pIHByZW1lbm7DvW1pLiBOYXByw61rbGFkIHNpbG7DoSBrb3JlbMOhY2lhIG1lZHppIG7DoWtsYWRtaSBuYSB1Ynl0b3ZhbmllIGEgZG9wcmF2b3UgbcO0xb5lIG5hem5hxI1vdmHFpSBsdXh1c25lasWhw60gxaF0w71sIGNlc3RvdmFuaWEuCmBgYHtyfQppbnN0YWxsLnBhY2thZ2VzKCJjb3JycGxvdCIpCmxpYnJhcnkoY29ycnBsb3QpCgpjb3JfbWF0cml4IDwtIGNvcih0cmF2ZWwubnVtLCB1c2UgPSAiY29tcGxldGUub2JzIikKY29ycnBsb3QoY29yX21hdHJpeCwgbWV0aG9kID0gImNvbG9yIiwgdHlwZSA9ICJ1cHBlciIsIHRsLmNvbCA9ICJibGFjayIsIHRsLnNydCA9IDQ1KQpgYGAKCgpUaGlzIGlzIGFuIFtSIE1hcmtkb3duXShodHRwOi8vcm1hcmtkb3duLnJzdHVkaW8uY29tKSBOb3RlYm9vay4gV2hlbiB5b3UgZXhlY3V0ZSBjb2RlIHdpdGhpbiB0aGUgbm90ZWJvb2ssIHRoZSByZXN1bHRzIGFwcGVhciBiZW5lYXRoIHRoZSBjb2RlLiAKClRyeSBleGVjdXRpbmcgdGhpcyBjaHVuayBieSBjbGlja2luZyB0aGUgKlJ1biogYnV0dG9uIHdpdGhpbiB0aGUgY2h1bmsgb3IgYnkgcGxhY2luZyB5b3VyIGN1cnNvciBpbnNpZGUgaXQgYW5kIHByZXNzaW5nICpDdHJsK1NoaWZ0K0VudGVyKi4gCgpgYGB7cn0KcGxvdChjYXJzKQpgYGAKCkFkZCBhIG5ldyBjaHVuayBieSBjbGlja2luZyB0aGUgKkluc2VydCBDaHVuayogYnV0dG9uIG9uIHRoZSB0b29sYmFyIG9yIGJ5IHByZXNzaW5nICpDdHJsK0FsdCtJKi4KCldoZW4geW91IHNhdmUgdGhlIG5vdGVib29rLCBhbiBIVE1MIGZpbGUgY29udGFpbmluZyB0aGUgY29kZSBhbmQgb3V0cHV0IHdpbGwgYmUgc2F2ZWQgYWxvbmdzaWRlIGl0IChjbGljayB0aGUgKlByZXZpZXcqIGJ1dHRvbiBvciBwcmVzcyAqQ3RybCtTaGlmdCtLKiB0byBwcmV2aWV3IHRoZSBIVE1MIGZpbGUpLgoKVGhlIHByZXZpZXcgc2hvd3MgeW91IGEgcmVuZGVyZWQgSFRNTCBjb3B5IG9mIHRoZSBjb250ZW50cyBvZiB0aGUgZWRpdG9yLiBDb25zZXF1ZW50bHksIHVubGlrZSAqS25pdCosICpQcmV2aWV3KiBkb2VzIG5vdCBydW4gYW55IFIgY29kZSBjaHVua3MuIEluc3RlYWQsIHRoZSBvdXRwdXQgb2YgdGhlIGNodW5rIHdoZW4gaXQgd2FzIGxhc3QgcnVuIGluIHRoZSBlZGl0b3IgaXMgZGlzcGxheWVkLgo=