1. Načítanie dát
Dataset: DailyDelhiClimateTest.csv
data_path <- "DailyDelhiClimateTest.csv"
df <- read.csv(data_path, stringsAsFactors = FALSE)
str(df)
'data.frame': 114 obs. of 5 variables:
$ date : chr "2017-01-01" "2017-01-02" "2017-01-03" "2017-01-04" ...
$ meantemp : num 15.9 18.5 17.1 18.7 18.4 ...
$ humidity : num 85.9 77.2 81.9 70 74.9 ...
$ wind_speed : num 2.74 2.89 4.02 4.54 3.3 ...
$ meanpressure: num 59 1018 1018 1016 1014 ...
2. Predspracovanie
- Skontrolujeme chýbajúce hodnoty a typy stĺpcov.
- Vytvoríme rok, mesiac a deň-of-week premenné.
# Prehľad chýbajúcich hodnôt
map_df(df, ~ sum(is.na(.))) %>% gather(key='variable', value='missing') %>% arrange(desc(missing))
# Convert to tidy date parts
df <- df %>% mutate(year = year(date), month = month(date, label = TRUE, abbr = TRUE),
day = day(date), weekday = wday(date, label = TRUE))
3. Exploratory data analysis (EDA)
3.1 Trendy pri teplote (avg temp)
df %>% ggplot(aes(x=date, y=meantemp)) +
geom_line() +
labs(title='Denný priemer teploty v Dillí (test set)', x='Dátum', y='Mean Temperature (°C)') +
theme_minimal()

3.2 Sezónnosť podľa mesiaca
df %>% group_by(month) %>% summarize(mean_meantemp = mean(meantemp, na.rm=TRUE),
sd_meantemp = sd(meantemp, na.rm=TRUE),
n = n()) %>%
ggplot(aes(x=month, y=mean_meantemp)) +
geom_col() + labs(title='Priemerná mean teplota podľa mesiaca', x='Mesiac', y='Priemer (°C)') +
theme_minimal()

3.3 Závislosti medzi premennými
library(GGally)
numeric_cols <- df %>% select_if(is.numeric) %>% select(-year)
GGally::ggpairs(numeric_cols)

4. Jednoduché modelovanie
4.1 Lineárny model — predikcia meantemp z vlhkosti a vetra
lm1 <- lm(meantemp ~ humidity + wind_speed + pressure, data = df)
Error in model.frame.default(formula = meantemp ~ humidity + wind_speed + :
invalid type (list) for variable 'pressure'
4.2 Časová rada — dekompozícia meantemp
5. Krátke závery
Bonus: uloženie upraveného datasetu
LS0tCnRpdGxlOiAiTGluZcOhcm5hIHJlZ3Jlc2lhIgphdXRob3I6ICJNw6FyaWEgTWF0w7rFoW92w6EiCmRhdGU6ICIyMDI1LTExLTA2IgpvdXRwdXQ6CiAgaHRtbF9ub3RlYm9vazoKICAgIHRvYzogdHJ1ZQogICAgdG9jX2Zsb2F0OiB0cnVlCiAgICB0aGVtZTogY29zbW8KICAgIGhpZ2hsaWdodDogemVuYnVybgogIGh0bWxfZG9jdW1lbnQ6CiAgICB0b2M6IHRydWUKICAgIGRmX3ByaW50OiBwYWdlZAplZGl0b3Jfb3B0aW9uczoKICBtYXJrZG93bjoKICAgIHdyYXA6IDcyCi0tLQoKYGBge3Igc2V0dXAsIGluY2x1ZGU9RkFMU0V9CmtuaXRyOjpvcHRzX2NodW5rJHNldChlY2hvID0gVFJVRSwgd2FybmluZyA9IEZBTFNFLCBtZXNzYWdlID0gRkFMU0UsIGZpZy53aWR0aD04LCBmaWcuaGVpZ2h0PTUpCmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KGx1YnJpZGF0ZSkKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KGZvcmVjYXN0KQpgYGAKCiMgMS4gTmHEjcOtdGFuaWUgZMOhdApEYXRhc2V0OiBgRGFpbHlEZWxoaUNsaW1hdGVUZXN0LmNzdmAKCmBgYHtyIGxvYWQtZGF0YX0KZGF0YV9wYXRoIDwtICJEYWlseURlbGhpQ2xpbWF0ZVRlc3QuY3N2IgpkZiA8LSByZWFkLmNzdihkYXRhX3BhdGgsIHN0cmluZ3NBc0ZhY3RvcnMgPSBGQUxTRSkKc3RyKGRmKQpoZWFkKGRmKQpgYGAKCiMgMi4gUHJlZHNwcmFjb3ZhbmllCi0gU2tvbnRyb2x1amVtZSBjaMO9YmFqw7pjZSBob2Rub3R5IGEgdHlweSBzdMS6cGNvdi4KLSBWeXR2b3LDrW1lIHJvaywgbWVzaWFjIGEgZGXFiC1vZi13ZWVrIHByZW1lbm7DqS4KCmBgYHtyIHByZXByb2Nlc3N9CiMgUHJlaMS+YWQgY2jDvWJhasO6Y2ljaCBob2Ruw7R0Cm1hcF9kZihkZiwgfiBzdW0oaXMubmEoLikpKSAlPiUgZ2F0aGVyKGtleT0ndmFyaWFibGUnLCB2YWx1ZT0nbWlzc2luZycpICU+JSBhcnJhbmdlKGRlc2MobWlzc2luZykpCgojIENvbnZlcnQgdG8gdGlkeSBkYXRlIHBhcnRzCmRmIDwtIGRmICU+JSBtdXRhdGUoeWVhciA9IHllYXIoZGF0ZSksIG1vbnRoID0gbW9udGgoZGF0ZSwgbGFiZWwgPSBUUlVFLCBhYmJyID0gVFJVRSksCiAgICAgICAgICAgICAgICAgICAgZGF5ID0gZGF5KGRhdGUpLCB3ZWVrZGF5ID0gd2RheShkYXRlLCBsYWJlbCA9IFRSVUUpKQpgYGAKCiMgMy4gRXhwbG9yYXRvcnkgZGF0YSBhbmFseXNpcyAoRURBKQoKIyMgMy4xIFRyZW5keSBwcmkgdGVwbG90ZSAoYXZnIHRlbXApCmBgYHtyIHRlbXAtdHJlbmQsIGZpZy5oZWlnaHQ9NH0KZGYgJT4lIGdncGxvdChhZXMoeD1kYXRlLCB5PW1lYW50ZW1wKSkgKwogIGdlb21fbGluZSgpICsKICBsYWJzKHRpdGxlPSdEZW5uw70gcHJpZW1lciB0ZXBsb3R5IHYgRGlsbMOtICh0ZXN0IHNldCknLCB4PSdEw6F0dW0nLCB5PSdNZWFuIFRlbXBlcmF0dXJlICjCsEMpJykgKwogIHRoZW1lX21pbmltYWwoKQpgYGAKCiMjIDMuMiBTZXrDs25ub3PFpSBwb2TEvmEgbWVzaWFjYQpgYGB7ciBieS1tb250aH0KZGYgJT4lIGdyb3VwX2J5KG1vbnRoKSAlPiUgc3VtbWFyaXplKG1lYW5fbWVhbnRlbXAgPSBtZWFuKG1lYW50ZW1wLCBuYS5ybT1UUlVFKSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHNkX21lYW50ZW1wID0gc2QobWVhbnRlbXAsIG5hLnJtPVRSVUUpLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgbiA9IG4oKSkgJT4lCiAgZ2dwbG90KGFlcyh4PW1vbnRoLCB5PW1lYW5fbWVhbnRlbXApKSArCiAgZ2VvbV9jb2woKSArIGxhYnModGl0bGU9J1ByaWVtZXJuw6EgbWVhbiB0ZXBsb3RhIHBvZMS+YSBtZXNpYWNhJywgeD0nTWVzaWFjJywgeT0nUHJpZW1lciAowrBDKScpICsKICB0aGVtZV9taW5pbWFsKCkKYGBgCgojIyAzLjMgWsOhdmlzbG9zdGkgbWVkemkgcHJlbWVubsO9bWkKYGBge3IgcGFpcnN9CmxpYnJhcnkoR0dhbGx5KQpudW1lcmljX2NvbHMgPC0gZGYgJT4lIHNlbGVjdF9pZihpcy5udW1lcmljKSAlPiUgc2VsZWN0KC15ZWFyKQpHR2FsbHk6OmdncGFpcnMobnVtZXJpY19jb2xzKQpgYGAKCiMgNC4gSmVkbm9kdWNow6kgbW9kZWxvdmFuaWUKCiMjIDQuMSBMaW5lw6FybnkgbW9kZWwg4oCUIHByZWRpa2NpYSBtZWFudGVtcCB6IHZsaGtvc3RpIGEgdmV0cmEKYGBge3IgbG0tbW9kZWx9CmxtMSA8LSBsbShtZWFudGVtcCB+IGh1bWlkaXR5ICsgd2luZF9zcGVlZCArIHByZXNzdXJlLCBkYXRhID0gZGYpCnN1bW1hcnkobG0xKQpwbG90KGxtMSwgd2hpY2g9MToyKQpgYGAKCiMjIDQuMiDEjGFzb3bDoSByYWRhIOKAlCBkZWtvbXBvesOtY2lhIG1lYW50ZW1wCmBgYHtyIHRzLWRlY29tcG9zZX0KIyBDcmVhdGUgdGltZSBzZXJpZXMgKGRhaWx5KS4gSWYgZGF0ZXMgYXJlIG5vdCBjb250aWd1b3VzLCB3ZSBzdGlsbCB1c2UgdHMgZm9yIHZpc3VhbGl6YXRpb24uCnRzX3RlbXAgPC0gdHMoZGYkbWVhbnRlbXAsIGZyZXF1ZW5jeSA9IDM2NSwgc3RhcnQgPSBjKG1pbihkZiR5ZWFyKSwgeWRheShtaW4oZGYkZGF0ZSkpKSkKIyBJZiB0aGUgc2VyaWVzIGxlbmd0aCBpcyBsZXNzIHRoYW4gYSBmdWxsIHllYXIsIGZyZXF1ZW5jeSBzdGlsbCBoZWxwcyB2aXN1YWxpemUgc2Vhc29uYWxpdHkuCmRlY29tcCA8LSBzdGwodHNfdGVtcCwgcy53aW5kb3c9J3BlcmlvZGljJykKcGxvdChkZWNvbXApCmBgYAoKIyA1LiBLcsOhdGtlIHrDoXZlcnkKYGBge3IgY29uY2x1c2lvbnMsIGVjaG89RkFMU0V9CmNhdCgnWmhybnV0aWU6IFxuLSBEYXRhc2V0IG9ic2FodWplJywgbnJvdyhkZiksICdyaWFka292LiBcbi0gVmlkaXRlxL5uw6kgc8O6IHNlesOzbm5lIHZ6b3J5IHYgdGVwbG90ZSBwb2TEvmEgbWVzaWFjb3YuIFxuLSBMaW5lw6FybnkgbW9kZWwgdWvDoXphbCAocG96cmkgc8O6aHJuKSwga3RvcsOpIHByZW1lbm7DqSBzw7ogc2lnbmlmaWthbnRuw6kgcHJlIHByZWRpa2NpdSBtZWFudGVtcC4nKQpgYGAKCiMgQm9udXM6IHVsb8W+ZW5pZSB1cHJhdmVuw6lobyBkYXRhc2V0dQpgYGB7ciBzYXZlfQp3cml0ZS5jc3YoZGYsICdEYWlseURlbGhpQ2xpbWF0ZVRlc3RfcHJlcHJvY2Vzc2VkLmNzdicsIHJvdy5uYW1lcyA9IEZBTFNFKQpgYGAKCi0tLQ==