R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

Pipe

gapminder %>%    
  group_by(continent,country) %>%
  # is as the same as nest(gapminder,group_by(continet,country))
  nest()
## # A tibble: 142 x 3
##    continent country     data             
##    <fct>     <fct>       <list>           
##  1 Asia      Afghanistan <tibble [12 x 4]>
##  2 Europe    Albania     <tibble [12 x 4]>
##  3 Africa    Algeria     <tibble [12 x 4]>
##  4 Africa    Angola      <tibble [12 x 4]>
##  5 Americas  Argentina   <tibble [12 x 4]>
##  6 Oceania   Australia   <tibble [12 x 4]>
##  7 Europe    Austria     <tibble [12 x 4]>
##  8 Asia      Bahrain     <tibble [12 x 4]>
##  9 Asia      Bangladesh  <tibble [12 x 4]>
## 10 Europe    Belgium     <tibble [12 x 4]>
## # ... with 132 more rows

Including Plots

Function map in purrr

means <- map_dbl(mtcars,mean)
medians <- map_dbl(mtcars,median)
summary(means)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##   0.4062   3.0149   3.6875  39.6085  18.9697 230.7219
summary(medians)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   2.663   4.000  34.112  18.455 196.300
x <- c()

seq_along(x)
## integer(0)
1:length(x)
## [1] 1 0

Model

Nested v.s Unntested

#source("gapminder-shiny.R")

colnames(gapminder)
## [1] "country"   "continent" "year"      "lifeExp"   "pop"       "gdpPercap"
#model <- lm(country ~ year, data = gapminder)

#models <- models %>%
#  mutate(
#    glance = model %>% map(broom::glance),
#    rsq = glance %>% map_dbl("r.squared"),
#    tidy = model %>% map(broom::tidy),
#    augment = model %>% map(broom::augment)
 # )
#models
fit <- lm(mtcars, hp ~.)
summary(fit)
## 
## Call:
## stats::lm(formula = formula, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -38.681 -15.558   0.799  18.106  34.718 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  79.0484   184.5041   0.428  0.67270   
## mpg          -2.0631     2.0906  -0.987  0.33496   
## cyl           8.2037    10.0861   0.813  0.42513   
## disp          0.4390     0.1492   2.942  0.00778 **
## drat         -4.6185    16.0829  -0.287  0.77680   
## wt          -27.6600    19.2704  -1.435  0.16591   
## qsec         -1.7844     7.3639  -0.242  0.81089   
## vs           25.8129    19.8512   1.300  0.20758   
## am            9.4863    20.7599   0.457  0.65240   
## gear          7.2164    14.6160   0.494  0.62662   
## carb         18.7487     7.0288   2.667  0.01441 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 25.97 on 21 degrees of freedom
## Multiple R-squared:  0.9028, Adjusted R-squared:  0.8565 
## F-statistic:  19.5 on 10 and 21 DF,  p-value: 1.898e-08
library(dplyr)

mtcars %>% lm(hp ~ .) %>% summary()
## 
## Call:
## stats::lm(formula = formula, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -38.681 -15.558   0.799  18.106  34.718 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  79.0484   184.5041   0.428  0.67270   
## mpg          -2.0631     2.0906  -0.987  0.33496   
## cyl           8.2037    10.0861   0.813  0.42513   
## disp          0.4390     0.1492   2.942  0.00778 **
## drat         -4.6185    16.0829  -0.287  0.77680   
## wt          -27.6600    19.2704  -1.435  0.16591   
## qsec         -1.7844     7.3639  -0.242  0.81089   
## vs           25.8129    19.8512   1.300  0.20758   
## am            9.4863    20.7599   0.457  0.65240   
## gear          7.2164    14.6160   0.494  0.62662   
## carb         18.7487     7.0288   2.667  0.01441 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 25.97 on 21 degrees of freedom
## Multiple R-squared:  0.9028, Adjusted R-squared:  0.8565 
## F-statistic:  19.5 on 10 and 21 DF,  p-value: 1.898e-08
# why there is error when write like:
   # mtcars %>% lm(hp ~ .)
   #        %>% summary()

Step2: fitted model to tidy results

fit <- mtcars %>% lm(hp ~ .)

glance(fit)
##   r.squared adj.r.squared    sigma statistic     p.value df    logLik
## 1 0.9027993     0.8565132 25.97138  19.50477 1.89833e-08 11 -142.8905
##        AIC      BIC deviance df.residual
## 1 309.7809 327.3697 14164.76          21
tidy(fit)
##           term    estimate   std.error  statistic     p.value
## 1  (Intercept)  79.0483879 184.5040756  0.4284371 0.672695339
## 2          mpg  -2.0630545   2.0905650 -0.9868407 0.334955314
## 3          cyl   8.2037204  10.0861425  0.8133655 0.425134929
## 4         disp   0.4390024   0.1492007  2.9423609 0.007779725
## 5         drat  -4.6185488  16.0829171 -0.2871711 0.776795845
## 6           wt -27.6600472  19.2703681 -1.4353668 0.165910518
## 7         qsec  -1.7843654   7.3639133 -0.2423121 0.810889101
## 8           vs  25.8128774  19.8512410  1.3003156 0.207583411
## 9           am   9.4862914  20.7599371  0.4569518 0.652397317
## 10        gear   7.2164047  14.6160152  0.4937327 0.626619355
## 11        carb  18.7486691   7.0287674  2.6674192 0.014412403
augment(fit) %>% head()
##           .rownames  hp  mpg cyl disp drat    wt  qsec vs am gear carb
## 1         Mazda RX4 110 21.0   6  160 3.90 2.620 16.46  0  1    4    4
## 2     Mazda RX4 Wag 110 21.0   6  160 3.90 2.875 17.02  0  1    4    4
## 3        Datsun 710  93 22.8   4  108 3.85 2.320 18.61  1  1    4    1
## 4    Hornet 4 Drive 110 21.4   6  258 3.08 3.215 19.44  1  0    3    1
## 5 Hornet Sportabout 175 18.7   8  360 3.15 3.440 17.02  0  0    3    2
## 6           Valiant 105 18.1   6  225 2.76 3.460 20.22  1  0    3    1
##     .fitted     .resid      .hat   .sigma     .cooksd .std.resid
## 1 148.68122 -38.681220 0.2142214 24.75946 0.069964902 -1.6801773
## 2 140.62866 -30.628664 0.2323739 25.43881 0.049861042 -1.3460408
## 3  79.99158  13.008418 0.3075987 26.38216 0.014633059  0.6019364
## 4 125.75448 -15.754483 0.2103960 26.31579 0.011288712 -0.6826601
## 5 183.21756  -8.217565 0.2016137 26.53317 0.002878707 -0.3541128
## 6 111.38490  -6.384902 0.3147448 26.55680 0.003682813 -0.2969840

A single, tity pipeline

mtcars %>%
  lm(hp ~ .) %>%
  glance()
##   r.squared adj.r.squared    sigma statistic     p.value df    logLik
## 1 0.9027993     0.8565132 25.97138  19.50477 1.89833e-08 11 -142.8905
##        AIC      BIC deviance df.residual
## 1 309.7809 327.3697 14164.76          21
iris %>%
  select(-Species) %>%
  kmeans(centers = 3) %>%
  tidy()
##         x1       x2       x3       x4 size withinss cluster
## 1 5.901613 2.748387 4.393548 1.433871   62 39.82097       1
## 2 6.850000 3.073684 5.742105 2.071053   38 23.87947       2
## 3 5.006000 3.428000 1.462000 0.246000   50 15.15100       3

A ridge regression with cross-fold validation

# dependancy
library(glmnet)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following object is masked from 'package:tidyr':
## 
##     expand
## Loading required package: foreach
## 
## Attaching package: 'foreach'
## The following objects are masked from 'package:purrr':
## 
##     accumulate, when
## Loaded glmnet 2.0-16
## 
## Attaching package: 'glmnet'
## The following objects are masked from 'package:twidlr':
## 
##     cv.glmnet, glmnet, predict.cv.glmnet, predict.glmnet
#mtcars %>%
#  cv.glmnet(am ~ ., alpha = 0) %>%
#  glance()