Ch 3 : The Basics

A computer language is described in syntax and symantics. Syntax is about grammar of the language and symantics is about the meaning behind the sentence.

3.2 Understand the Console

There are 4 windows in the console.

Script files

Can open many files at a time
Saves script
Allows code and comments

Console/ Command Line

Where output is displayed

Workspace Environment

Holds objects
Can review history

Misc-Displays

Shows files in working directory
Shows plots
shows which packages are downloaded to computer and which ones are loaded
Help files/search

Get and set working directory

getwd()

## [1] "C:/Users/jmortada/Desktop/Data Wrangling with R"

setwd('C:/Users/jmortada/Desktop/Data Wrangling with R')

List all objects


ls()

Identify whether an object with a given name is present


exists('a')

Remove an object or a few objects

a <- c(1,2,3)

rm('a')

Remove all objects in working environment

a <- c(1,2,3)
b <- c(4,5)


rm(list = ls())

View history of last 25 commands


history()   # Note if you do history(56), you can see last 56 commands.
            #.. If you do history(Inf), you see entire saved history

Save an image


save.image()

Save a list of objects

object1 <- c('hi','bye')
object2 <- c(2,3)
object3 <- c(TRUE, TRUE, TRUE, FALSE)

save(object1,object2,object3, file = 'all_objects.Rda')  # or .RData instead of Rda
                                                         # will save objects in working directory

Load saved objects

load('all_objects.Rda') # Make sure you are in the right working directory

Understanding the command options

help(options) # Allow the user to set and examine a variety of global options which affect the way in which R computes and displays its results.

## starting httpd help server ... done

options(digits = 3)   # Changing the specific option of number of digits to pint on output. Making it 3 here.

Getting Help

help.start()   # provides general help links

## If nothing happens, you should open
## 'http://127.0.0.1:16079/doc/html/index.html' yourself

help.search('time series')    # Help on a topic
 
help(glm)   # Help on  function name or could use ?glm

example(glm)  # Gives an example using the function

## 
## glm> ## Dobson (1990) Page 93: Randomized Controlled Trial :
## glm> counts <- c(18,17,15,20,10,20,25,13,12)
## 
## glm> outcome <- gl(3,1,9)
## 
## glm> treatment <- gl(3,3)
## 
## glm> print(d.AD <- data.frame(treatment, outcome, counts))
##   treatment outcome counts
## 1         1       1     18
## 2         1       2     17
## 3         1       3     15
## 4         2       1     20
## 5         2       2     10
## 6         2       3     20
## 7         3       1     25
## 8         3       2     13
## 9         3       3     12
## 
## glm> glm.D93 <- glm(counts ~ outcome + treatment, family = poisson())
## 
## glm> anova(glm.D93)
## Analysis of Deviance Table
## 
## Model: poisson, link: log
## 
## Response: counts
## 
## Terms added sequentially (first to last)
## 
## 
##           Df Deviance Resid. Df Resid. Dev
## NULL                          8      10.58
## outcome    2     5.45         6       5.13
## treatment  2     0.00         4       5.13
## 
## glm> ## No test: 
## glm> ##D summary(glm.D93)
## glm> ## End(No test)
## glm> ## Computing AIC [in many ways]:
## glm> (A0 <- AIC(glm.D93))
## [1] 56.8
## 
## glm> (ll <- logLik(glm.D93))
## 'log Lik.' -23.4 (df=5)
## 
## glm> A1 <- -2*c(ll) + 2*attr(ll, "df")
## 
## glm> A2 <- glm.D93$family$aic(counts, mu=fitted(glm.D93), wt=1) +
## glm+         2 * length(coef(glm.D93))
## 
## glm> stopifnot(exprs = {
## glm+   all.equal(A0, A1)
## glm+   all.equal(A1, A2)
## glm+   all.equal(A1, glm.D93$aic)
## glm+ })
## 
## glm> ## No test: 
## glm> ##D ## an example with offsets from Venables & Ripley (2002, p.189)
## glm> ##D utils::data(anorexia, package = "MASS")
## glm> ##D 
## glm> ##D anorex.1 <- glm(Postwt ~ Prewt + Treat + offset(Prewt),
## glm> ##D                 family = gaussian, data = anorexia)
## glm> ##D summary(anorex.1)
## glm> ## End(No test)
## glm> 
## glm> # A Gamma example, from McCullagh & Nelder (1989, pp. 300-2)
## glm> clotting <- data.frame(
## glm+     u = c(5,10,15,20,30,40,60,80,100),
## glm+     lot1 = c(118,58,42,35,27,25,21,19,18),
## glm+     lot2 = c(69,35,26,21,18,16,13,12,12))
## 
## glm> summary(glm(lot1 ~ log(u), data = clotting, family = Gamma))
## 
## Call:
## glm(formula = lot1 ~ log(u), family = Gamma, data = clotting)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.0401  -0.0376  -0.0264   0.0290   0.0864  
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.016554   0.000928   -17.9  4.3e-07 ***
## log(u)       0.015343   0.000415    37.0  2.8e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Gamma family taken to be 0.00245)
## 
##     Null deviance: 3.51283  on 8  degrees of freedom
## Residual deviance: 0.01673  on 7  degrees of freedom
## AIC: 37.99
## 
## Number of Fisher Scoring iterations: 3
## 
## 
## glm> summary(glm(lot2 ~ log(u), data = clotting, family = Gamma))
## 
## Call:
## glm(formula = lot2 ~ log(u), family = Gamma, data = clotting)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.0557  -0.0293   0.0103   0.0171   0.0637  
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.023908   0.001326   -18.0  4.0e-07 ***
## log(u)       0.023599   0.000577    40.9  1.4e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Gamma family taken to be 0.00181)
## 
##     Null deviance: 3.118557  on 8  degrees of freedom
## Residual deviance: 0.012672  on 7  degrees of freedom
## AIC: 27.03
## 
## Number of Fisher Scoring iterations: 3
## 
## 
## glm> ## Aliased ("S"ingular) -> 1 NA coefficient
## glm> (fS <- glm(lot2 ~ log(u) + log(u^2), data = clotting, family = Gamma))
## 
## Call:  glm(formula = lot2 ~ log(u) + log(u^2), family = Gamma, data = clotting)
## 
## Coefficients:
## (Intercept)       log(u)     log(u^2)  
##     -0.0239       0.0236           NA  
## 
## Degrees of Freedom: 8 Total (i.e. Null);  7 Residual
## Null Deviance:       3.12 
## Residual Deviance: 0.0127    AIC: 27
## 
## glm> tools::assertError(update(fS, singular.ok=FALSE), verbose=interactive())
## 
## glm> ## -> .. "singular fit encountered"
## glm> 
## glm> ## Not run: 
## glm> ##D ## for an example of the use of a terms object as a formula
## glm> ##D demo(glm.vr)
## glm> ## End(Not run)
## glm> 
## glm>

Getting help from web

RSiteSearch(‘key phrase’)
Stack Overflow: http://stackoverflow.com/questions/tagged/r
Cross validated: http://stats.stackexchange.com/questions/tagged/r
R-seek: http://rseek.org/
R-bloggers: http://www.r-bloggers.com

Install packages from CRAN

Note: you can also install packages from Bioconductor using biocLite()

# Use install.packages('forecast')  
# use install.packages(c('forecast','dplyr'))

Download GitHub packages

# Use install.packages('devtools') # devtools provides a simple function to download GitHub packages

# Use devtools::install_github('username/packagename')

Load packages


# Use library(packagename)

Use a function without downloading a package


# Use tidyr::gather(functionarguments)

Getting help with packages

help(package = 'tidyr')

See all packages installed


library()

See packages currently loaded


search()

View specific vignette

vignette(package = 'dplyr') # Lists all vignettes for a specific package

vignette('two-table') # View specific vignette (two-table is in dplyr) or vignette('programming')

vignette()     # Shows all vignettes on the computer

Assignments and Evaluation


x <- 3 

x 

y = 4 

z <<- 5

z

R as calculator

2 + 1 / 4 ^ 2

## [1] 2.06

2 + (3 / 4) ^ 2

## [1] 2.56

(4+ 3) / 3 ^ 2   # By default R displays 7 digits but we can change this using options()

## [1] 0.778

options(digits = 3)  # Changing number of digits displayed to 3 

99999 * 99999   # will be displayed in scientific notation

## [1] 1e+10

options(digits = 7)  # Note that largest number of digits that can be displayed is 22

Undefined calculations

1/0

## [1] Inf

Inf - Inf

## [1] NaN

-1/0

## [1] -Inf

0/0

## [1] NaN

sqrt(-5)

## Warning in sqrt(-5): NaNs produced

## [1] NaN

Integer Division and modulo

42/4   # regular division

## [1] 10.5

42 %/% 4 # integer division

## [1] 10

42 %% 4 # modular (remainder)

## [1] 2

Vectorization

x <- c(1 , 2 , 3)
y <- c(2 , 4 , 6)

x  + y  # will give you the results in a vector z of the same size.

## [1] 3 6 9

# Another way to do this addition is through a for loop. In some languages other than R, you can't do x + y, but you have to do the for loop.
# So R makes it easy


# Adding x and y using the for loop method 

# Create an Empty vector 

z <- as.vector(NULL)

for (i in seq_along(x)) {
         z[i] <- x[i] + y[i]
         print(z)
}

## [1] 3
## [1] 3 6
## [1] 3 6 9