1. Načítanie dát

Dataset: DailyDelhiClimateTest.csv

data_path <- "DailyDelhiClimateTest.csv"
df <- read.csv(data_path, stringsAsFactors = FALSE)
str(df)
'data.frame':   114 obs. of  5 variables:
 $ date        : chr  "2017-01-01" "2017-01-02" "2017-01-03" "2017-01-04" ...
 $ meantemp    : num  15.9 18.5 17.1 18.7 18.4 ...
 $ humidity    : num  85.9 77.2 81.9 70 74.9 ...
 $ wind_speed  : num  2.74 2.89 4.02 4.54 3.3 ...
 $ meanpressure: num  59 1018 1018 1016 1014 ...
head(df)

2. Predspracovanie

  • Skontrolujeme chýbajúce hodnoty a typy stĺpcov.
  • Vytvoríme rok, mesiac a deň-of-week premenné.
# Prehľad chýbajúcich hodnôt
map_df(df, ~ sum(is.na(.))) %>% gather(key='variable', value='missing') %>% arrange(desc(missing))

# Convert to tidy date parts
df <- df %>% mutate(year = year(date), month = month(date, label = TRUE, abbr = TRUE),
                    day = day(date), weekday = wday(date, label = TRUE))

3. Exploratory data analysis (EDA)

3.1 Trendy pri teplote (avg temp)

df %>% ggplot(aes(x=date, y=meantemp)) +
  geom_line() +
  labs(title='Denný priemer teploty v Dillí (test set)', x='Dátum', y='Mean Temperature (°C)') +
  theme_minimal()

3.2 Sezónnosť podľa mesiaca

df %>% group_by(month) %>% summarize(mean_meantemp = mean(meantemp, na.rm=TRUE),
                                     sd_meantemp = sd(meantemp, na.rm=TRUE),
                                     n = n()) %>%
  ggplot(aes(x=month, y=mean_meantemp)) +
  geom_col() + labs(title='Priemerná mean teplota podľa mesiaca', x='Mesiac', y='Priemer (°C)') +
  theme_minimal()

3.3 Závislosti medzi premennými

library(GGally)
numeric_cols <- df %>% select_if(is.numeric) %>% select(-year)
GGally::ggpairs(numeric_cols)

4. Jednoduché modelovanie

4.1 Lineárny model — predikcia meantemp z vlhkosti a vetra

lm1 <- lm(meantemp ~ humidity + wind_speed + pressure, data = df)
Error in model.frame.default(formula = meantemp ~ humidity + wind_speed +  : 
  invalid type (list) for variable 'pressure'

4.2 Časová rada — dekompozícia meantemp

5. Krátke závery

Bonus: uloženie upraveného datasetu


LS0tCnRpdGxlOiAiTGluZcOhcm5hIHJlZ3Jlc2lhIgphdXRob3I6ICJNw6FyaWEgTWF0w7rFoW92w6EiCmRhdGU6ICIyMDI1LTExLTA2IgpvdXRwdXQ6CiAgaHRtbF9ub3RlYm9vazoKICAgIHRvYzogdHJ1ZQogICAgdG9jX2Zsb2F0OiB0cnVlCiAgICB0aGVtZTogY29zbW8KICAgIGhpZ2hsaWdodDogemVuYnVybgogIGh0bWxfZG9jdW1lbnQ6CiAgICB0b2M6IHRydWUKICAgIGRmX3ByaW50OiBwYWdlZAplZGl0b3Jfb3B0aW9uczoKICBtYXJrZG93bjoKICAgIHdyYXA6IDcyCi0tLQoKYGBge3Igc2V0dXAsIGluY2x1ZGU9RkFMU0V9CmtuaXRyOjpvcHRzX2NodW5rJHNldChlY2hvID0gVFJVRSwgd2FybmluZyA9IEZBTFNFLCBtZXNzYWdlID0gRkFMU0UsIGZpZy53aWR0aD04LCBmaWcuaGVpZ2h0PTUpCmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KGx1YnJpZGF0ZSkKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KGZvcmVjYXN0KQpgYGAKCiMgMS4gTmHEjcOtdGFuaWUgZMOhdApEYXRhc2V0OiBgRGFpbHlEZWxoaUNsaW1hdGVUZXN0LmNzdmAKCmBgYHtyIGxvYWQtZGF0YX0KZGF0YV9wYXRoIDwtICJEYWlseURlbGhpQ2xpbWF0ZVRlc3QuY3N2IgpkZiA8LSByZWFkLmNzdihkYXRhX3BhdGgsIHN0cmluZ3NBc0ZhY3RvcnMgPSBGQUxTRSkKc3RyKGRmKQpoZWFkKGRmKQpgYGAKCiMgMi4gUHJlZHNwcmFjb3ZhbmllCi0gU2tvbnRyb2x1amVtZSBjaMO9YmFqw7pjZSBob2Rub3R5IGEgdHlweSBzdMS6cGNvdi4KLSBWeXR2b3LDrW1lIHJvaywgbWVzaWFjIGEgZGXFiC1vZi13ZWVrIHByZW1lbm7DqS4KCmBgYHtyIHByZXByb2Nlc3N9CiMgUHJlaMS+YWQgY2jDvWJhasO6Y2ljaCBob2Ruw7R0Cm1hcF9kZihkZiwgfiBzdW0oaXMubmEoLikpKSAlPiUgZ2F0aGVyKGtleT0ndmFyaWFibGUnLCB2YWx1ZT0nbWlzc2luZycpICU+JSBhcnJhbmdlKGRlc2MobWlzc2luZykpCgojIENvbnZlcnQgdG8gdGlkeSBkYXRlIHBhcnRzCmRmIDwtIGRmICU+JSBtdXRhdGUoeWVhciA9IHllYXIoZGF0ZSksIG1vbnRoID0gbW9udGgoZGF0ZSwgbGFiZWwgPSBUUlVFLCBhYmJyID0gVFJVRSksCiAgICAgICAgICAgICAgICAgICAgZGF5ID0gZGF5KGRhdGUpLCB3ZWVrZGF5ID0gd2RheShkYXRlLCBsYWJlbCA9IFRSVUUpKQpgYGAKCiMgMy4gRXhwbG9yYXRvcnkgZGF0YSBhbmFseXNpcyAoRURBKQoKIyMgMy4xIFRyZW5keSBwcmkgdGVwbG90ZSAoYXZnIHRlbXApCmBgYHtyIHRlbXAtdHJlbmQsIGZpZy5oZWlnaHQ9NH0KZGYgJT4lIGdncGxvdChhZXMoeD1kYXRlLCB5PW1lYW50ZW1wKSkgKwogIGdlb21fbGluZSgpICsKICBsYWJzKHRpdGxlPSdEZW5uw70gcHJpZW1lciB0ZXBsb3R5IHYgRGlsbMOtICh0ZXN0IHNldCknLCB4PSdEw6F0dW0nLCB5PSdNZWFuIFRlbXBlcmF0dXJlICjCsEMpJykgKwogIHRoZW1lX21pbmltYWwoKQpgYGAKCiMjIDMuMiBTZXrDs25ub3PFpSBwb2TEvmEgbWVzaWFjYQpgYGB7ciBieS1tb250aH0KZGYgJT4lIGdyb3VwX2J5KG1vbnRoKSAlPiUgc3VtbWFyaXplKG1lYW5fbWVhbnRlbXAgPSBtZWFuKG1lYW50ZW1wLCBuYS5ybT1UUlVFKSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHNkX21lYW50ZW1wID0gc2QobWVhbnRlbXAsIG5hLnJtPVRSVUUpLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgbiA9IG4oKSkgJT4lCiAgZ2dwbG90KGFlcyh4PW1vbnRoLCB5PW1lYW5fbWVhbnRlbXApKSArCiAgZ2VvbV9jb2woKSArIGxhYnModGl0bGU9J1ByaWVtZXJuw6EgbWVhbiB0ZXBsb3RhIHBvZMS+YSBtZXNpYWNhJywgeD0nTWVzaWFjJywgeT0nUHJpZW1lciAowrBDKScpICsKICB0aGVtZV9taW5pbWFsKCkKYGBgCgojIyAzLjMgWsOhdmlzbG9zdGkgbWVkemkgcHJlbWVubsO9bWkKYGBge3IgcGFpcnN9CmxpYnJhcnkoR0dhbGx5KQpudW1lcmljX2NvbHMgPC0gZGYgJT4lIHNlbGVjdF9pZihpcy5udW1lcmljKSAlPiUgc2VsZWN0KC15ZWFyKQpHR2FsbHk6OmdncGFpcnMobnVtZXJpY19jb2xzKQpgYGAKCiMgNC4gSmVkbm9kdWNow6kgbW9kZWxvdmFuaWUKCiMjIDQuMSBMaW5lw6FybnkgbW9kZWwg4oCUIHByZWRpa2NpYSBtZWFudGVtcCB6IHZsaGtvc3RpIGEgdmV0cmEKYGBge3IgbG0tbW9kZWx9CmxtMSA8LSBsbShtZWFudGVtcCB+IGh1bWlkaXR5ICsgd2luZF9zcGVlZCArIHByZXNzdXJlLCBkYXRhID0gZGYpCnN1bW1hcnkobG0xKQpwbG90KGxtMSwgd2hpY2g9MToyKQpgYGAKCiMjIDQuMiDEjGFzb3bDoSByYWRhIOKAlCBkZWtvbXBvesOtY2lhIG1lYW50ZW1wCmBgYHtyIHRzLWRlY29tcG9zZX0KIyBDcmVhdGUgdGltZSBzZXJpZXMgKGRhaWx5KS4gSWYgZGF0ZXMgYXJlIG5vdCBjb250aWd1b3VzLCB3ZSBzdGlsbCB1c2UgdHMgZm9yIHZpc3VhbGl6YXRpb24uCnRzX3RlbXAgPC0gdHMoZGYkbWVhbnRlbXAsIGZyZXF1ZW5jeSA9IDM2NSwgc3RhcnQgPSBjKG1pbihkZiR5ZWFyKSwgeWRheShtaW4oZGYkZGF0ZSkpKSkKIyBJZiB0aGUgc2VyaWVzIGxlbmd0aCBpcyBsZXNzIHRoYW4gYSBmdWxsIHllYXIsIGZyZXF1ZW5jeSBzdGlsbCBoZWxwcyB2aXN1YWxpemUgc2Vhc29uYWxpdHkuCmRlY29tcCA8LSBzdGwodHNfdGVtcCwgcy53aW5kb3c9J3BlcmlvZGljJykKcGxvdChkZWNvbXApCmBgYAoKIyA1LiBLcsOhdGtlIHrDoXZlcnkKYGBge3IgY29uY2x1c2lvbnMsIGVjaG89RkFMU0V9CmNhdCgnWmhybnV0aWU6IFxuLSBEYXRhc2V0IG9ic2FodWplJywgbnJvdyhkZiksICdyaWFka292LiBcbi0gVmlkaXRlxL5uw6kgc8O6IHNlesOzbm5lIHZ6b3J5IHYgdGVwbG90ZSBwb2TEvmEgbWVzaWFjb3YuIFxuLSBMaW5lw6FybnkgbW9kZWwgdWvDoXphbCAocG96cmkgc8O6aHJuKSwga3RvcsOpIHByZW1lbm7DqSBzw7ogc2lnbmlmaWthbnRuw6kgcHJlIHByZWRpa2NpdSBtZWFudGVtcC4nKQpgYGAKCiMgQm9udXM6IHVsb8W+ZW5pZSB1cHJhdmVuw6lobyBkYXRhc2V0dQpgYGB7ciBzYXZlfQp3cml0ZS5jc3YoZGYsICdEYWlseURlbGhpQ2xpbWF0ZVRlc3RfcHJlcHJvY2Vzc2VkLmNzdicsIHJvdy5uYW1lcyA9IEZBTFNFKQpgYGAKCi0tLQ==