xn8 — Oct 29, 2013, 2:27 PM
################# RStudio ###################
# Familiarize yourself with the 4 RStudio panels and menus
# Create a new RStudio project from a new directory
# http://www.haikudeck.com/p/ZP8kSBdzcT
################## Know your DATA #####################
# Data types
aNumber <- ((3*5) + sqrt(4))/12
aNumber
[1] 1.417
aCharacter <- "4"
aCharacter
[1] "4"
aLogical <- TRUE
aLogical
[1] TRUE
# Simple data structures
aLogicalVector <- c(TRUE, FALSE, FALSE, FALSE)
aLogicalVector
[1] TRUE FALSE FALSE FALSE
aCharacterVector <- c("ShuChi", "Juan", "Mary")
aCharacterVector
[1] "ShuChi" "Juan" "Mary"
aFactorVariable <- as.factor(aCharacterVector)
aFactorVariable
[1] ShuChi Juan Mary
Levels: Juan Mary ShuChi
# Generating data & sampling
aNumericVector <- seq(1:31)
anotherNumericVector <- rep(1:7,4)
aSample <- sample(aNumericVector,5)
aSample
[1] 8 23 13 30 27
# Indexing
anotherNumericVector[22] # Index to pull one element of vector
[1] 1
aNumericMatrix <- matrix(aNumericVector, ncol=7, byrow=TRUE)
Warning: data length [31] is not a sub-multiple or multiple of the number
of rows [5]
dim(aNumericMatrix)
[1] 5 7
# Complex data structures
alist <- list(c("a", "b", "c"), c(1,2,3,4), c(TRUE, FALSE))
# Indexing a vector or list
aNumericVector[1] # Index t pull one element of vector
[1] 1
aNumber[1] # This singular value can be indexed as it is considered a vector of length 1
[1] 1.417
aNumericMatrix[3,4] # Index to pull one element of matrix
[1] 18
alist[[2]][2] # Index to pull the 2nd element of the 2nd vector
[1] 2
# Data Frames
salary <- c(21000, 23400, 26800)
startdate <- as.Date(c('2013-10-1','2008-3-25','2007-3-14'))
aDataframe <- data.frame(salary,startdate)
# Add another column to the dataframe from a character vector
aDataframe$employee<-aCharacterVector
aDataframe
salary startdate employee
1 21000 2013-10-01 ShuChi
2 23400 2008-03-25 Juan
3 26800 2007-03-14 Mary
# Indexing a dataframe
aDataframe[2,3] # Indexing a dataframe element
[1] "Juan"
aDataframe[2,] # Indexing a dataframe row
salary startdate employee
2 23400 2008-03-25 Juan
aDataframe[,2] # Indexing a dataframe column
[1] "2013-10-01" "2008-03-25" "2007-03-14"
aDataframe[,2:3] # Indexing selected dataframe columns
startdate employee
1 2013-10-01 ShuChi
2 2008-03-25 Juan
3 2007-03-14 Mary
# Reading in Data
data(mtcars) # Built-in dataframe
# R documentation for Data, usually includes examples of usage
?mtcars
# Family Planning Effort tab-delimited file from web
fpe<-read.table(file="http://web.pop.psu.edu/~spicer/effort.dat", header=TRUE)
################## TOOLS and TECHNIQUES #####################
# Conditionals
aDataframe$pctincrease <- ifelse(aDataframe$startdate < as.Date('2010-09-23'), 1.03, 1.0)
# Implied iteration (vectorization)
aDataframe$newsalary <- aDataframe[,1]*aDataframe$pctincrease
# Built in functions for data manipulation, analysis and output
table (fpe$effort)
0 3 4 6 7 9 13 14 15 16 19 21 23
4 2 1 1 2 1 1 1 2 2 1 1 1
fpe$effort.factor<-cut(fpe$effort, breaks=c(-1, 4, 14, 100), label=c("weak", "moderate", "strong"))
table (fpe$effort.factor)
weak moderate strong
7 6 7
c# On Command line use tab to complete cut function command
function (..., recursive = FALSE) .Primitive("c")
# Documentation on the function describes arguments and value returned
?cut
# Store results of analysis functions in an object to reuse all or part of the results
?lm
lmfit<-lm(fpe$change ~ fpe$setting + fpe$effort)
class(lmfit)
[1] "lm"
str(lmfit)
List of 12
$ coefficients : Named num [1:3] -14.451 0.271 0.968
..- attr(*, "names")= chr [1:3] "(Intercept)" "fpe$setting" "fpe$effort"
$ residuals : Named num [1:20] 3 4.43 3.89 3.13 0.4 ...
..- attr(*, "names")= chr [1:20] "1" "2" "3" "4" ...
$ effects : Named num [1:20] -63.95 -34.66 -27.48 1.62 -1.74 ...
..- attr(*, "names")= chr [1:20] "(Intercept)" "fpe$setting" "fpe$effort" "" ...
$ rank : int 3
$ fitted.values: Named num [1:20] -2 5.57 25.11 21.87 28.6 ...
..- attr(*, "names")= chr [1:20] "1" "2" "3" "4" ...
$ assign : int [1:3] 0 1 2
$ qr :List of 5
..$ qr : num [1:20, 1:3] -4.472 0.224 0.224 0.224 0.224 ...
.. ..- attr(*, "dimnames")=List of 2
.. .. ..$ : chr [1:20] "1" "2" "3" "4" ...
.. .. ..$ : chr [1:3] "(Intercept)" "fpe$setting" "fpe$effort"
.. ..- attr(*, "assign")= int [1:3] 0 1 2
..$ qraux: num [1:3] 1.22 1.1 1.2
..$ pivot: int [1:3] 1 2 3
..$ tol : num 1e-07
..$ rank : int 3
..- attr(*, "class")= chr "qr"
$ df.residual : int 17
$ xlevels : Named list()
$ call : language lm(formula = fpe$change ~ fpe$setting + fpe$effort)
$ terms :Classes 'terms', 'formula' length 3 fpe$change ~ fpe$setting + fpe$effort
.. ..- attr(*, "variables")= language list(fpe$change, fpe$setting, fpe$effort)
.. ..- attr(*, "factors")= int [1:3, 1:2] 0 1 0 0 0 1
.. .. ..- attr(*, "dimnames")=List of 2
.. .. .. ..$ : chr [1:3] "fpe$change" "fpe$setting" "fpe$effort"
.. .. .. ..$ : chr [1:2] "fpe$setting" "fpe$effort"
.. ..- attr(*, "term.labels")= chr [1:2] "fpe$setting" "fpe$effort"
.. ..- attr(*, "order")= int [1:2] 1 1
.. ..- attr(*, "intercept")= int 1
.. ..- attr(*, "response")= int 1
.. ..- attr(*, ".Environment")=<environment: R_GlobalEnv>
.. ..- attr(*, "predvars")= language list(fpe$change, fpe$setting, fpe$effort)
.. ..- attr(*, "dataClasses")= Named chr [1:3] "numeric" "numeric" "numeric"
.. .. ..- attr(*, "names")= chr [1:3] "fpe$change" "fpe$setting" "fpe$effort"
$ model :'data.frame': 20 obs. of 3 variables:
..$ fpe$change : int [1:20] 1 10 29 25 29 40 21 0 13 4 ...
..$ fpe$setting: int [1:20] 46 74 89 77 84 89 68 70 60 55 ...
..$ fpe$effort : int [1:20] 0 0 16 16 21 15 14 6 13 9 ...
..- attr(*, "terms")=Classes 'terms', 'formula' length 3 fpe$change ~ fpe$setting + fpe$effort
.. .. ..- attr(*, "variables")= language list(fpe$change, fpe$setting, fpe$effort)
.. .. ..- attr(*, "factors")= int [1:3, 1:2] 0 1 0 0 0 1
.. .. .. ..- attr(*, "dimnames")=List of 2
.. .. .. .. ..$ : chr [1:3] "fpe$change" "fpe$setting" "fpe$effort"
.. .. .. .. ..$ : chr [1:2] "fpe$setting" "fpe$effort"
.. .. ..- attr(*, "term.labels")= chr [1:2] "fpe$setting" "fpe$effort"
.. .. ..- attr(*, "order")= int [1:2] 1 1
.. .. ..- attr(*, "intercept")= int 1
.. .. ..- attr(*, "response")= int 1
.. .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv>
.. .. ..- attr(*, "predvars")= language list(fpe$change, fpe$setting, fpe$effort)
.. .. ..- attr(*, "dataClasses")= Named chr [1:3] "numeric" "numeric" "numeric"
.. .. .. ..- attr(*, "names")= chr [1:3] "fpe$change" "fpe$setting" "fpe$effort"
- attr(*, "class")= chr "lm"
methods(class="lm")
[1] add1.lm* alias.lm* anova.lm
[4] case.names.lm* confint.lm* cooks.distance.lm*
[7] deviance.lm* dfbeta.lm* dfbetas.lm*
[10] drop1.lm* dummy.coef.lm* effects.lm*
[13] extractAIC.lm* family.lm* formula.lm*
[16] hatvalues.lm influence.lm* kappa.lm
[19] labels.lm* logLik.lm* model.frame.lm
[22] model.matrix.lm nobs.lm* plot.lm
[25] predict.lm print.lm proj.lm*
[28] qr.lm* residuals.lm rstandard.lm
[31] rstudent.lm simulate.lm* summary.lm
[34] variable.names.lm* vcov.lm*
Non-visible functions are asterisked
# Try some of the methods
summary(lmfit)
Call:
lm(formula = fpe$change ~ fpe$setting + fpe$effort)
Residuals:
Min 1Q Median 3Q Max
-10.348 -3.643 0.638 3.225 15.853
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -14.451 7.094 -2.04 0.05752 .
fpe$setting 0.271 0.108 2.51 0.02263 *
fpe$effort 0.968 0.225 4.30 0.00048 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 6.39 on 17 degrees of freedom
Multiple R-squared: 0.738, Adjusted R-squared: 0.707
F-statistic: 24 on 2 and 17 DF, p-value: 1.13e-05
hatvalues(lmfit)
1 2 3 4 5 6 7 8 9
0.20764 0.17505 0.11756 0.08944 0.17107 0.11296 0.09033 0.06240 0.13165
10 11 12 13 14 15 16 17 18
0.12817 0.34990 0.15277 0.21720 0.15849 0.14436 0.13354 0.11172 0.16851
19 20
0.08825 0.18899
# R will dummy up the factor variables for you using the lowest value as the reference
covfit<-lm(fpe$change ~ fpe$setting + fpe$effort.factor)
# Output to screen or file
plot (lmfit)
# Use Export menu in RStudio plot window to save the plot
# Save data with new variables as R data
save(fpe, file = "fpe.Rdata")
# Save your R script and create a Notebook
# install.packages("knitr")
require(knitr)
# Check the package documentation
demo(package="knitr")
vignette(package="knitr") # Look for a link to the vignette in the package Description
data(package="knitr")
no data sets found
# You can request a free RPubs account to publish your results
################## Summary #####################
# You should be comfortable with...
# - Creating RStudio Projects
# - Locating help and reading R documentation for classes, functions & data
# - Keyboard shortcuts: command completion with tab, up-arrow, etc
# - Workspace and history
# - Editing an R script
# - Creating an RStudio Notebook