Rexample.R

xn8 — Oct 29, 2013, 2:27 PM

################# RStudio ###################
# Familiarize yourself with the 4 RStudio panels and menus
# Create a new RStudio project from a new directory

# http://www.haikudeck.com/p/ZP8kSBdzcT
################## Know your DATA #####################

# Data types

aNumber <- ((3*5) + sqrt(4))/12
aNumber
[1] 1.417
aCharacter <- "4"
aCharacter
[1] "4"
aLogical <- TRUE
aLogical
[1] TRUE

# Simple data structures

aLogicalVector <- c(TRUE, FALSE, FALSE, FALSE)
aLogicalVector
[1]  TRUE FALSE FALSE FALSE

aCharacterVector <- c("ShuChi", "Juan", "Mary")
aCharacterVector
[1] "ShuChi" "Juan"   "Mary"  

aFactorVariable <- as.factor(aCharacterVector)
aFactorVariable
[1] ShuChi Juan   Mary  
Levels: Juan Mary ShuChi

# Generating data & sampling 
aNumericVector <- seq(1:31)
anotherNumericVector <- rep(1:7,4)
aSample <- sample(aNumericVector,5)
aSample
[1]  8 23 13 30 27

# Indexing
anotherNumericVector[22] # Index to pull one element of vector
[1] 1

aNumericMatrix <- matrix(aNumericVector, ncol=7, byrow=TRUE)
Warning: data length [31] is not a sub-multiple or multiple of the number
of rows [5]
dim(aNumericMatrix)
[1] 5 7

# Complex data structures

alist <- list(c("a", "b", "c"), c(1,2,3,4), c(TRUE, FALSE))

# Indexing a vector or list
aNumericVector[1]  # Index t pull one element of vector
[1] 1
aNumber[1]  # This singular value can be indexed as it is considered a vector of length 1
[1] 1.417
aNumericMatrix[3,4]  # Index to pull one element of matrix
[1] 18
alist[[2]][2]  # Index to pull the 2nd element of the 2nd vector
[1] 2

# Data Frames
salary <- c(21000, 23400, 26800)
startdate <- as.Date(c('2013-10-1','2008-3-25','2007-3-14'))
aDataframe <- data.frame(salary,startdate)

# Add another column to the dataframe from a character vector
aDataframe$employee<-aCharacterVector
aDataframe
  salary  startdate employee
1  21000 2013-10-01   ShuChi
2  23400 2008-03-25     Juan
3  26800 2007-03-14     Mary

# Indexing a dataframe
aDataframe[2,3] # Indexing a dataframe element
[1] "Juan"
aDataframe[2,] # Indexing a dataframe row
  salary  startdate employee
2  23400 2008-03-25     Juan
aDataframe[,2] # Indexing a dataframe column
[1] "2013-10-01" "2008-03-25" "2007-03-14"
aDataframe[,2:3] # Indexing selected dataframe columns
   startdate employee
1 2013-10-01   ShuChi
2 2008-03-25     Juan
3 2007-03-14     Mary


# Reading in Data
data(mtcars)  # Built-in dataframe

# R documentation for Data, usually includes examples of usage
?mtcars

# Family Planning Effort tab-delimited file from web
fpe<-read.table(file="http://web.pop.psu.edu/~spicer/effort.dat", header=TRUE) 

################## TOOLS and TECHNIQUES #####################

# Conditionals
aDataframe$pctincrease <- ifelse(aDataframe$startdate < as.Date('2010-09-23'), 1.03, 1.0)

# Implied iteration (vectorization)
aDataframe$newsalary <-  aDataframe[,1]*aDataframe$pctincrease

# Built in functions for data manipulation, analysis and output

table (fpe$effort)

 0  3  4  6  7  9 13 14 15 16 19 21 23 
 4  2  1  1  2  1  1  1  2  2  1  1  1 
fpe$effort.factor<-cut(fpe$effort, breaks=c(-1, 4, 14, 100), label=c("weak", "moderate", "strong"))
table (fpe$effort.factor)

    weak moderate   strong 
       7        6        7 


c# On Command line use tab to complete cut function command
function (..., recursive = FALSE)  .Primitive("c")
# Documentation on the function describes arguments and value returned
?cut

# Store results of analysis functions in an object to reuse all or part of the results 
?lm
lmfit<-lm(fpe$change ~ fpe$setting + fpe$effort)
class(lmfit)
[1] "lm"
str(lmfit)
List of 12
 $ coefficients : Named num [1:3] -14.451 0.271 0.968
  ..- attr(*, "names")= chr [1:3] "(Intercept)" "fpe$setting" "fpe$effort"
 $ residuals    : Named num [1:20] 3 4.43 3.89 3.13 0.4 ...
  ..- attr(*, "names")= chr [1:20] "1" "2" "3" "4" ...
 $ effects      : Named num [1:20] -63.95 -34.66 -27.48 1.62 -1.74 ...
  ..- attr(*, "names")= chr [1:20] "(Intercept)" "fpe$setting" "fpe$effort" "" ...
 $ rank         : int 3
 $ fitted.values: Named num [1:20] -2 5.57 25.11 21.87 28.6 ...
  ..- attr(*, "names")= chr [1:20] "1" "2" "3" "4" ...
 $ assign       : int [1:3] 0 1 2
 $ qr           :List of 5
  ..$ qr   : num [1:20, 1:3] -4.472 0.224 0.224 0.224 0.224 ...
  .. ..- attr(*, "dimnames")=List of 2
  .. .. ..$ : chr [1:20] "1" "2" "3" "4" ...
  .. .. ..$ : chr [1:3] "(Intercept)" "fpe$setting" "fpe$effort"
  .. ..- attr(*, "assign")= int [1:3] 0 1 2
  ..$ qraux: num [1:3] 1.22 1.1 1.2
  ..$ pivot: int [1:3] 1 2 3
  ..$ tol  : num 1e-07
  ..$ rank : int 3
  ..- attr(*, "class")= chr "qr"
 $ df.residual  : int 17
 $ xlevels      : Named list()
 $ call         : language lm(formula = fpe$change ~ fpe$setting + fpe$effort)
 $ terms        :Classes 'terms', 'formula' length 3 fpe$change ~ fpe$setting + fpe$effort
  .. ..- attr(*, "variables")= language list(fpe$change, fpe$setting, fpe$effort)
  .. ..- attr(*, "factors")= int [1:3, 1:2] 0 1 0 0 0 1
  .. .. ..- attr(*, "dimnames")=List of 2
  .. .. .. ..$ : chr [1:3] "fpe$change" "fpe$setting" "fpe$effort"
  .. .. .. ..$ : chr [1:2] "fpe$setting" "fpe$effort"
  .. ..- attr(*, "term.labels")= chr [1:2] "fpe$setting" "fpe$effort"
  .. ..- attr(*, "order")= int [1:2] 1 1
  .. ..- attr(*, "intercept")= int 1
  .. ..- attr(*, "response")= int 1
  .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv> 
  .. ..- attr(*, "predvars")= language list(fpe$change, fpe$setting, fpe$effort)
  .. ..- attr(*, "dataClasses")= Named chr [1:3] "numeric" "numeric" "numeric"
  .. .. ..- attr(*, "names")= chr [1:3] "fpe$change" "fpe$setting" "fpe$effort"
 $ model        :'data.frame':  20 obs. of  3 variables:
  ..$ fpe$change : int [1:20] 1 10 29 25 29 40 21 0 13 4 ...
  ..$ fpe$setting: int [1:20] 46 74 89 77 84 89 68 70 60 55 ...
  ..$ fpe$effort : int [1:20] 0 0 16 16 21 15 14 6 13 9 ...
  ..- attr(*, "terms")=Classes 'terms', 'formula' length 3 fpe$change ~ fpe$setting + fpe$effort
  .. .. ..- attr(*, "variables")= language list(fpe$change, fpe$setting, fpe$effort)
  .. .. ..- attr(*, "factors")= int [1:3, 1:2] 0 1 0 0 0 1
  .. .. .. ..- attr(*, "dimnames")=List of 2
  .. .. .. .. ..$ : chr [1:3] "fpe$change" "fpe$setting" "fpe$effort"
  .. .. .. .. ..$ : chr [1:2] "fpe$setting" "fpe$effort"
  .. .. ..- attr(*, "term.labels")= chr [1:2] "fpe$setting" "fpe$effort"
  .. .. ..- attr(*, "order")= int [1:2] 1 1
  .. .. ..- attr(*, "intercept")= int 1
  .. .. ..- attr(*, "response")= int 1
  .. .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv> 
  .. .. ..- attr(*, "predvars")= language list(fpe$change, fpe$setting, fpe$effort)
  .. .. ..- attr(*, "dataClasses")= Named chr [1:3] "numeric" "numeric" "numeric"
  .. .. .. ..- attr(*, "names")= chr [1:3] "fpe$change" "fpe$setting" "fpe$effort"
 - attr(*, "class")= chr "lm"
methods(class="lm")
 [1] add1.lm*           alias.lm*          anova.lm          
 [4] case.names.lm*     confint.lm*        cooks.distance.lm*
 [7] deviance.lm*       dfbeta.lm*         dfbetas.lm*       
[10] drop1.lm*          dummy.coef.lm*     effects.lm*       
[13] extractAIC.lm*     family.lm*         formula.lm*       
[16] hatvalues.lm       influence.lm*      kappa.lm          
[19] labels.lm*         logLik.lm*         model.frame.lm    
[22] model.matrix.lm    nobs.lm*           plot.lm           
[25] predict.lm         print.lm           proj.lm*          
[28] qr.lm*             residuals.lm       rstandard.lm      
[31] rstudent.lm        simulate.lm*       summary.lm        
[34] variable.names.lm* vcov.lm*          

   Non-visible functions are asterisked
# Try some of the methods
summary(lmfit)

Call:
lm(formula = fpe$change ~ fpe$setting + fpe$effort)

Residuals:
    Min      1Q  Median      3Q     Max 
-10.348  -3.643   0.638   3.225  15.853 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  -14.451      7.094   -2.04  0.05752 .  
fpe$setting    0.271      0.108    2.51  0.02263 *  
fpe$effort     0.968      0.225    4.30  0.00048 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 6.39 on 17 degrees of freedom
Multiple R-squared:  0.738, Adjusted R-squared:  0.707 
F-statistic:   24 on 2 and 17 DF,  p-value: 1.13e-05
hatvalues(lmfit)
      1       2       3       4       5       6       7       8       9 
0.20764 0.17505 0.11756 0.08944 0.17107 0.11296 0.09033 0.06240 0.13165 
     10      11      12      13      14      15      16      17      18 
0.12817 0.34990 0.15277 0.21720 0.15849 0.14436 0.13354 0.11172 0.16851 
     19      20 
0.08825 0.18899 

# R will dummy up the factor variables for you using the lowest value as the reference
covfit<-lm(fpe$change ~ fpe$setting + fpe$effort.factor)

# Output to screen or file
plot (lmfit)

plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1

# Use Export menu in RStudio plot window to save the plot

# Save data with new variables as R data
save(fpe, file = "fpe.Rdata")

# Save your R script and create a Notebook
# install.packages("knitr")
require(knitr)
# Check the package documentation
demo(package="knitr")
vignette(package="knitr") # Look for a link to the vignette in the package Description
data(package="knitr")
no data sets found
# You can request a free RPubs account to publish your results

################## Summary #####################  

# You should be comfortable with...

# - Creating RStudio Projects
# - Locating help and reading R documentation for classes, functions & data
# - Keyboard shortcuts: command completion with tab, up-arrow, etc
# - Workspace and history
# - Editing an R script 
# - Creating an RStudio Notebook