1 Whtat is R?

1.1 History

1980 from S (AT&T Bell Laboratories)
Created by Ross Ihaka and Robert Gentleman, University of Auckland, New Zealand
Developed currently by R Development Core Team

1.2 For What?

Statistical computing
Data analysis
Graphics

2 Why is R?

TIOBE Index for March 2019 (https://www.tiobe.com/tiobe-index/)
Top Software for Analytics, Data Science, Machine Learning in 2018 (https://www.kdnuggets.com/2018/05/poll-tools-analytics-data-science-machine-learning-results.html)
FREE

3 Where is R?

4 Simple examples

4.1 apply

n=200
p=6
x <- matrix(rnorm(n*p,0,1),nrow = n)
apply(x,2,sd)
apply(x,2,mean)
apply(x, 2, sort)

4.2 ls

n=200
p=6
x <- matrix(rnorm(n*p,0,1),nrow = n)
beta <- c(1,2,0,3,0,0)
y <- x%*%beta+rnorm(n)
fit <- lm(y~x-1)
summary(fit)

## 
## Call:
## lm(formula = y ~ x - 1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -3.12770 -0.77127 -0.01196  0.56726  2.78710 
## 
## Coefficients:
##    Estimate Std. Error t value Pr(>|t|)    
## x1  1.04006    0.07207  14.431   <2e-16 ***
## x2  2.03310    0.07661  26.537   <2e-16 ***
## x3  0.03523    0.07773   0.453    0.651    
## x4  3.01407    0.07668  39.309   <2e-16 ***
## x5  0.02877    0.07487   0.384    0.701    
## x6  0.03364    0.07682   0.438    0.662    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.063 on 194 degrees of freedom
## Multiple R-squared:  0.9289, Adjusted R-squared:  0.9267 
## F-statistic: 422.4 on 6 and 194 DF,  p-value: < 2.2e-16

## glment
library("glmnet")
fit.glmnet <- cv.glmnet(x,y,family="gaussian")
plot(fit.glmnet)

coef(fit.glmnet)

## 7 x 1 sparse Matrix of class "dgCMatrix"
##                        1
## (Intercept) -0.006773416
## V1           0.865268570
## V2           1.859444315
## V3           .          
## V4           2.806069225
## V5           .          
## V6           .

4.3 glm

n=200
p=6
x <- matrix(rnorm(n*p,0,1),nrow = n)
beta <- c(1,2,0,3,0,0)
beta0 <- 1.5
mu <- exp(beta0+x%*%beta)
u <- runif(n)
y <- (u<=(mu/(1+mu)))
fit <- glm(y~x,family = gaussian)
fit

## 
## Call:  glm(formula = y ~ x, family = gaussian)
## 
## Coefficients:
## (Intercept)           x1           x2           x3           x4  
##    0.673970     0.075858     0.168674    -0.049978     0.270509  
##          x5           x6  
##   -0.038466    -0.007128  
## 
## Degrees of Freedom: 199 Total (i.e. Null);  193 Residual
## Null Deviance:       43.88 
## Residual Deviance: 24.35     AIC: 162.4

## glmnet
fit.glmnet <- cv.glmnet(x,y,family="binomial")
plot(fit.glmnet)

coef(fit.glmnet)

## 7 x 1 sparse Matrix of class "dgCMatrix"
##                      1
## (Intercept)  1.1018865
## V1           0.2996526
## V2           0.9561836
## V3          -0.1174316
## V4           1.6072234
## V5           .        
## V6           .

4.4 read.table

pvalues1 = read.table("pvaluesW6All.txt")
read.table(pvalues1, file="test_rt.txt")
save(pvalues1,file="test_save.txt")
write.csv(tab,file=filenames,row.names=F)

4.5 Community:

5 Visualization

5.1 Manhattan Plot

library(ggplot2)
source("ManhattanPlot.R")
pvalues1 = read.table("pvaluesW6All.txt")
ng = ftable(pvalues1[,7])
p = dim(pvalues1)[1]
snp_posit = NULL
for(k in 1:22)snp_posit = c(snp_posit,seq(ng[k]))
par(mar = c(5,8,1,1),oma = c(1,1,1,1), mgp=c(3,1.2,0))
par(mfrow=c(2,1))
for(k in c(1,3)){
  pvalue = data.frame(cbind(pvalues1[,7],c(1:p),snp_posit,pvalues1[,k]))
  colnames(pvalue) <- c("CHR","SNP","BP","P")
  manhattan(pvalue,pch=20)
}

5.2 plot

set.seed(1)
x <- rnorm(1000)
y <- 2*x + rnorm(1000)
fit <- lm(y~x)
plot(x,y,main="Linear regression", col="gray")
abline(coef(fit))

5.3 ggvis plot

library(ggvis)
mtcars %>%
  ggvis(~wt, ~mpg) %>%
  layer_smooths(span = input_slider(0.5, 1, value = 1, step=0.1)) %>%
  layer_points(size := input_slider(100, 1000, value = 100, ticks=F, 
                                    pre="pre_", post="_post"))

## Warning: Can't output dynamic/interactive ggvis plots in a knitr document.
## Generating a static (non-dynamic, non-interactive) version of the plot.

5.4 animation plot

library(animation)
library(plyr)
oopt = ani.options(interval = 0.3, nmax = 101)
a <- sort(rnorm(100, 2))
b <- sort(rnorm(100, 7))
out <- vector("list", 101)
for (i in 1:ani.options("nmax")) {
  ji <- seq(from = 0, to = 5, by = .05)
  a <- jitter(a, factor = 1, amount = ji[i])
  fab1 <- lm(a ~ b)
  coe <- summary(fab1)$coefficients
  r2 <- summary(fab1)$r.squared
  if (coe[2, 4] < .0001) p <- " < .0001"
  if (coe[2, 4] < .001 & coe[2, 4] > .0001) p <- " < .001"
  if (coe[2, 4] > .01) p <- round(coe[2, 4], 3)
  plot(a ~ b, main = "Linear model")
  abline(fab1, col = "red", lw = 2)
  text(x = min(b) + 2, y = max(a) - 1, 
       labels = paste("t = ", round(coe[2, 3], 3), ", p = ", p, ", R2 = ", round(r2, 3)))
  out[[i]] <- c(coe[2, 3], coe[2, 4], r2)
  ani.pause()
  }

ani.options(oopt)

5.5 3D plot (install XQuartz on MacOS)

# library(rgl)
# library(scatterplot3d)
x1=seq(-3,3,by = 0.1)
a1=1
a2=1
x2=sqrt((9-a1*x1^2)/a2)
x3=seq(-4,4,by = 0.1)
x4=sqrt((16-a1*x3^2)/a2)
plot(x3,x4)
points(x1,x2)

xy=rbind(cbind(x1,x2),cbind(x1,-x2),cbind(x3,x4),cbind(x3,-x4))

plot(xy[c(123:284),1],xy[c(123:284),2],col=2,pch = 16)
points(xy[c(1:122),1],xy[c(1:122),2],col=3,pch = 16)

z1=xy[,1]^2
z2=xy[,2]^2
z3=sqrt(2)*xy[,1]*xy[,2]
library(scatterplot3d)
scatterplot3d(z1,z2,z3,pch = 3)
library(rgl)
open3d()
plot3d(z1[c(1:122)], z2[c(1:122)], z3[c(1:122)],col = 3,size = 6)
plot3d(z1[c(123:284)], z2[c(123:284)], z3[c(123:284)],col = 2,size = 6,add = TRUE)

######
# install.packages("caTools")  # install external package
library(caTools)             # external package providing write.gif function
jet.colors <- colorRampPalette(c("red", "blue", "#007FFF", "cyan", "#7FFF7F",
                                 "yellow", "#FF7F00", "red", "#7F0000"))
dx <- 1500                    # define width
dy <- 1400                    # define height
C  <- complex(real = rep(seq(-2.2, 1.0, length.out = dx), each = dy),
              imag = rep(seq(-1.2, 1.2, length.out = dy), dx))
C <- matrix(C, dy, dx)       # reshape as square matrix of complex numbers
Z <- 0                       # initialize Z to zero
X <- array(0, c(dy, dx, 20)) # initialize output 3D array
for (k in 1:20) {            # loop with 20 iterations
  Z <- Z^2 + C               # the central difference equation
  X[, , k] <- exp(-abs(Z))   # capture results
}
write.gif(X, "Mandelbrot.gif", col = jet.colors, delay = 100)

5.6 More amazing plots:

See also: https://www.r-graph-gallery.com/

5.7 Website:

Example 1, factorial: https://xliusufe.shinyapps.io/factorial/

library(shiny)
library(FactSum)
  # Application title
  #headerPanel("Factorial of n")
  
  # Sidebar with a slider input for number of observations
  inputPanel(
    selectInput("is_sum", "Calculate sum 1!+2+..+n!?", 
                choices = c("No", "Yes")),
    numericInput("obs", "Nature number input:", 10)
  )

  sumInput <- reactive({
    switch(input$is_sum,
           "No" = 0,
           "Yes" = 1)
  })
  
renderPrint({
    fit <- fact(input$obs, sumInput())
    cat("Length of factorial:",fit$len_fact,"\n")
    cat("Number of zeros in the last:",fit$nzeros,"\n")
    cat("Factorial of ",input$obs,":\n")
    cat(fit$fact)
    if(sumInput()){
      cat("\n Length of Sum of factorials:\n",fit$len_sum,"\n")
      cat("Sum of factorials",input$obs,":\n")
      cat(fit$fact_sum)
    }
  })

Example 2, Plots

inputPanel(
  selectInput("n_breaks", label = "Number of bins:",
              choices = c(10, 20, 35, 50), selected = 20),
  
  sliderInput("bw_adjust", label = "Bandwidth adjustment:",
              min = 0.2, max = 2, value = 1, step = 0.2)
)

renderPlot({
  hist(faithful$eruptions, probability = TRUE, breaks = as.numeric(input$n_breaks),
       xlab = "Duration (minutes)", main = "Geyser eruption duration")
  
  dens <- density(faithful$eruptions, adjust = input$bw_adjust)
  lines(dens, col = "blue")
})

Example 3, table

shinyAppDir(
  system.file("examples/06_tabsets", package = "shiny"),
  options = list(
    width = "100%", height = 550
  )
)

5.8 What else?

Animation
SVM: e1071
Optimization: DEoptim, quadqrog, linprog, CVXR, RcppDE
Parallel: doParallel, Rmpi, snow, multicore, RcppParallel
Rmarkdown
Deep learning: tf & keras
Workflowr
Distributed computation: sparklyr, SuperR

6 R package

6.1 How many are they?

more than 15,000 (to Sep. 2018)

6.2 Where are they?

Cran: https://cran.r-project.org/web/packages/
Github: https://github.com
Bioconductor: https://www.bioconductor.org/
Omegahat: http://www.omegahat.net/

6.3 How to use them?

  install.packages("rpk",dir)
  library(rpk)

6.4 Can I unzip them?

Yes, you can unzip them and
repack them

7 Developing R packages

7.1 How to develop them?

Rstudio
R command-line:

    package.skeleton("myrpk")

7.2 How to pack them?

cmd (windows) or terminal (MacOS):

    R CMD build myrpk
    R CMD Rd2pdf myrpk
    R CMD check myrpk

8 Example: FactSum

8.1 Summary

R package "FactSum" calculates the factorial of a large integer, which may be much greater than the maximum memory of any data type. FactSum implements dramatically fast. It takes only 0.45 seconds to cumpute 10000! (it approximates 2.8E+35660), and 0.98 seconds to compute 10000! and sum=1!+2!+3!+...+10000! simultaneously. It takes only one minute to cumpute 100000! (it approximates 2.8E+456574), and less then two minutes to compute 10000! and sum=1!+2!+3!+...+100000! simultaneously.

8.2 Installation

    #install.packages("devtools")
    library(devtools)
    install_github("xliusufe/FactSum")

8.3 Usage

FactSum-manual.pdf ———- Details of the usage of the package.
factorial.md ———————- The factorial of n=20, 50, 100, 1000, 10000, and 100000.
Web-based calculator ——— A web-based calculator

8.4 Example

library(FactSum)
fact(200,1)

## $fact
## [1] "788657867364790503552363213932185062295135977687173263294742533244359449963403342920304284011984623904177212138919638830257642790242637105061926624952829931113462857270763317237396988943922445621451664240254033291864131227428294853277524242407573903240321257405579568660226031904170324062351700858796178922222789623703897374720000000000000000000000000000000000000000000000000"
## 
## $len_fact
## [1] 375
## 
## $fact_sum
## [1] "792621072814943158937574954417696054502273470568077747007887743862285047941581535541498718312275086275679893343076013862361579680670972527976009279036348551929550827607601145450876014530359530008733947699274904382825445634899233107885545828612637824213482759975963581961375904743254634250508637523339809985946128242523687347261107994804323593105039052556442336528920420940313"
## 
## $len_sum
## [1] 375
## 
## $nzeros
## [1] 49

8.5 Development

This R package is developed by Xu Liu (liu.xu@sufe.edu.cn).

8.6 See also sqrtn

9 External languages

9.1 How to call C/C++ or FORTRAN

Rtools
Interface to compile code (For C/C++)

   .C
   .Call
   .External

foo.dll (Windows) or foo.so (MacOS):

     R CMD SHLIB foo.c

compile in Rpk

Talky Talky R

Xu Liu

1 Whtat is R?

1.1 History

1.2 For What?

2 Why is R?

3 Where is R?

4 Simple examples

4.1 apply

4.2 ls

4.3 glm

4.4 read.table

4.5 Community:

5 Visualization

5.1 Manhattan Plot

5.2 plot

5.3 ggvis plot

5.4 animation plot

5.5 3D plot (install XQuartz on MacOS)

5.6 More amazing plots:

5.7 Website:

5.8 What else?

6 R package

6.1 How many are they?

6.2 Where are they?

6.3 How to use them?

6.4 Can I unzip them?

7 Developing R packages

7.1 How to develop them?

7.2 How to pack them?

8 Example: FactSum

8.1 Summary

8.2 Installation

8.3 Usage

8.4 Example

8.5 Development

8.6 See also sqrtn

9 External languages

9.1 How to call C/C++ or FORTRAN

9.2 How to use Rcpp?

Talky Talky R

Xu Liu

1 Whtat is R?

1.1 History

1.2 For What?

2 Why is R?

3 Where is R?

4 Simple examples

4.1 apply

4.2 ls

4.3 glm

4.4 read.table

4.5 Community:

5 Visualization

5.1 Manhattan Plot

5.2 plot

5.3 ggvis plot

5.4 animation plot

5.5 3D plot (install XQuartz on MacOS)

5.6 More amazing plots:

5.7 Website:

5.8 What else?

6 R package

6.1 How many are they?

6.2 Where are they?

6.3 How to use them?

6.4 Can I unzip them?

7 Developing R packages

7.1 How to develop them?

7.2 How to pack them?

7.3 Where and How to share them?

8 Example: FactSum

8.1 Summary

8.2 Installation

8.3 Usage

8.4 Example

8.5 Development

8.6 See also sqrtn

9 External languages

9.1 How to call C/C++ or FORTRAN

9.2 How to use Rcpp?