map2.R

library(tidyverse)

## Warning: package 'tidyverse' was built under R version 3.4.4

## -- Attaching packages ---------------------------------------------- tidyverse 1.2.1 --

## √ ggplot2 3.1.0     √ purrr   0.2.5
## √ tibble  1.4.2     √ dplyr   0.7.8
## √ tidyr   0.8.1     √ stringr 1.3.1
## √ readr   1.1.1     √ forcats 0.3.0

## Warning: package 'ggplot2' was built under R version 3.4.4

## Warning: package 'tibble' was built under R version 3.4.4

## Warning: package 'tidyr' was built under R version 3.4.4

## Warning: package 'readr' was built under R version 3.4.4

## Warning: package 'purrr' was built under R version 3.4.4

## Warning: package 'dplyr' was built under R version 3.4.4

## Warning: package 'stringr' was built under R version 3.4.4

## Warning: package 'forcats' was built under R version 3.4.4

## -- Conflicts ------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(stats)
# reproducible results
set.seed(47)

# shuffle / randomise rows
mtcars2 <- mtcars %>% sample_frac(1)

# split train / test
mtcars_train <- mtcars2[1:20, ] %>%
  rownames_to_column() %>%
  as.tibble()

mtcars_test <- mtcars2[21:32, ] %>%
  rownames_to_column() %>%
  as.tibble()
mtcars_test

## # A tibble: 12 x 12
##    rowname   mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear
##    <chr>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1 Datsun~  22.8     4 108      93  3.85  2.32  18.6     1     1     4
##  2 Merc 4~  17.3     8 276.    180  3.07  3.73  17.6     0     0     3
##  3 Valiant  18.1     6 225     105  2.76  3.46  20.2     1     0     3
##  4 Mazda ~  21       6 160     110  3.9   2.88  17.0     0     1     4
##  5 Fiat 1~  32.4     4  78.7    66  4.08  2.2   19.5     1     1     4
##  6 Porsch~  26       4 120.     91  4.43  2.14  16.7     0     1     5
##  7 Ford P~  15.8     8 351     264  4.22  3.17  14.5     0     1     5
##  8 Merc 2~  24.4     4 147.     62  3.69  3.19  20       1     0     4
##  9 Fiat X~  27.3     4  79      66  4.08  1.94  18.9     1     1     4
## 10 Toyota~  33.9     4  71.1    65  4.22  1.84  19.9     1     1     4
## 11 Merc 2~  22.8     4 141.     95  3.92  3.15  22.9     1     0     4
## 12 Hornet~  18.7     8 360     175  3.15  3.44  17.0     0     0     3
## # ... with 1 more variable: carb <dbl>

mtcars_model_spline <- function(df) {
  with(df, smooth.spline(x = mpg, y = disp, df = 3))
}

# for each cyl group create subsets and fit the models of interest using map
dt_models <- as.tibble(mtcars_train) %>%
  mutate(cyl = if_else(cyl < 7, 4, cyl)) %>%
  nest(-cyl) %>% # print() %>%
  rename(data_train = data) %>%
  mutate(
    # model1 = map(data_train, ~approxfun(x = .$disp, y = .$mpg)),
    model2 = map(data_train, ~lm(disp ~ mpg, data = .)),
    model3 = map(data_train, mtcars_model_spline),
    model31 = map(data_train, ~smooth.spline(x = .$mpg, y = .$disp, df = 3))
    
  )

## Warning: package 'bindrcpp' was built under R version 3.4.4

# rename(data_train = data) %>%
# print()
mtcars_test

## # A tibble: 12 x 12
##    rowname   mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear
##    <chr>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1 Datsun~  22.8     4 108      93  3.85  2.32  18.6     1     1     4
##  2 Merc 4~  17.3     8 276.    180  3.07  3.73  17.6     0     0     3
##  3 Valiant  18.1     6 225     105  2.76  3.46  20.2     1     0     3
##  4 Mazda ~  21       6 160     110  3.9   2.88  17.0     0     1     4
##  5 Fiat 1~  32.4     4  78.7    66  4.08  2.2   19.5     1     1     4
##  6 Porsch~  26       4 120.     91  4.43  2.14  16.7     0     1     5
##  7 Ford P~  15.8     8 351     264  4.22  3.17  14.5     0     1     5
##  8 Merc 2~  24.4     4 147.     62  3.69  3.19  20       1     0     4
##  9 Fiat X~  27.3     4  79      66  4.08  1.94  18.9     1     1     4
## 10 Toyota~  33.9     4  71.1    65  4.22  1.84  19.9     1     1     4
## 11 Merc 2~  22.8     4 141.     95  3.92  3.15  22.9     1     0     4
## 12 Hornet~  18.7     8 360     175  3.15  3.44  17.0     0     0     3
## # ... with 1 more variable: carb <dbl>

# join test data to be able to predict them
dt_test_data <- mtcars_test %>%
  nest(-cyl) %>%
  inner_join(dt_models, by = "cyl") %>%
  rename(data_test = data) %>%
  mutate(
    pred.rt1 = map2(data_train, data_test, function(train, test) {
      approx(x = train$mpg, y = train$hp, xout = test$mpg)$y
    }),
    
    pred.rt2 = map2(model2, data_test, function(px, py) {
      predict(px, py)
    }),
    pred.rt21 = map2(model2, data_test, ~predict(.x, .y)),
    
    pred.rt3 = map2(model3, data_test, function(px, py) {
      predict(px, py$mpg)$y
    }),
    pred.rt31 = map2(model31, data_test,~predict(.x, .y$mpg)$y) 
    
  ) %>%
  #select(data_train) %>%
  #unnest() %>%
  print()

## # A tibble: 2 x 11
##     cyl data_test data_train model2 model3 model31 pred.rt1 pred.rt2
##   <dbl> <list>    <list>     <list> <list> <list>  <list>   <list>  
## 1     4 <tibble ~ <tibble [~ <S3: ~ <S3: ~ <S3: s~ <dbl [7~ <dbl [7~
## 2     8 <tibble ~ <tibble [~ <S3: ~ <S3: ~ <S3: s~ <dbl [3~ <dbl [3~
## # ... with 3 more variables: pred.rt21 <list>, pred.rt3 <list>,
## #   pred.rt31 <list>

dt_test_data$pred.rt2

## [[1]]
##         1         2         3         4         5         6         7 
## 143.67034  75.40274 120.91448 132.29241 111.66991  64.73593 143.67034 
## 
## [[2]]
##        1        2        3 
## 316.2009 339.5750 294.3851

dt_test_data$pred.rt21

## [[1]]
##         1         2         3         4         5         6         7 
## 143.67034  75.40274 120.91448 132.29241 111.66991  64.73593 143.67034 
## 
## [[2]]
##        1        2        3 
## 316.2009 339.5750 294.3851

dt_test_data$pred.rt3

## [[1]]
## [1] 156.03740  63.42927 132.80349 146.11966 120.10918  46.44018 156.03740
## 
## [[2]]
## [1] 327.0194 321.6170 351.1309

dt_test_data$pred.rt31

## [[1]]
## [1] 156.03740  63.42927 132.80349 146.11966 120.10918  46.44018 156.03740
## 
## [[2]]
## [1] 327.0194 321.6170 351.1309

# predict test data using map2
pred.rt1 <- map2(dt_test_data$data_train, dt_test_data$data_test, function(train, test) {
  approx(x = train$mpg, y = train$hp, xout = test$mpg)$y
}) %>% print()

## [[1]]
## [1] 94.88202       NA 89.66854 92.27528 87.55056       NA 94.88202
## 
## [[2]]
## [1] 178.3929 160.0000 175.8929

pred.rt2 <- map2(dt_test_data$model2, dt_test_data$data_test, function(x, y) {
  predict(x, y)
}) %>% print()

## [[1]]
##         1         2         3         4         5         6         7 
## 143.67034  75.40274 120.91448 132.29241 111.66991  64.73593 143.67034 
## 
## [[2]]
##        1        2        3 
## 316.2009 339.5750 294.3851

pred.rt3 <- map2(  dt_test_data$model3, dt_test_data$data_test,  function(x, y) {
    predict(x, y$mpg)$y
  }
) %>% print()

## [[1]]
## [1] 156.03740  63.42927 132.80349 146.11966 120.10918  46.44018 156.03740
## 
## [[2]]
## [1] 327.0194 321.6170 351.1309

map2.R

besuk

Sun Feb 17 01:03:40 2019