Make sure you install R and RStudio by our next session. I will make subsequent sessions more hands-on.
Download and install R for Windows (DIRECT LINK)
http://watson.nci.nih.gov/cran_mirror/bin/windows/base/R-2.15.1-win.exe
Download and install R for Mac (DIRECT LINK; Install both files)
http://watson.nci.nih.gov/cran_mirror/bin/macosx/R-2.15.1.pkg
http://watson.nci.nih.gov/cran_mirror/bin/macosx/tools/tcltk-8.5.5-x11.dmg
RStudio (Watch screencast, then download RStudio Desktop for Mac or Win)
http://www.rstudio.org
Making sense of abbreviations in R
http://www.r-bloggers.com/abbreviations-of-r-commands-explained-250-r-abbreviations/
UCLA Statistical Computing: Data Analysis Examples
https://www.ats.ucla.edu/stat/dae/
R-podcast
http://www.r-podcast.org
Quick R
http://www.statmethods.net
HMS Computational Statistics for Biomedical Science
http://informaticstraining.hms.harvard.edu/content/lectures-and-problem-sets
R courses at HSPH
http://bcb.dfci.harvard.edu/%7Eaedin/
Introductory Statistics with R (ISwR)
This book is especially good because the author is a _bio_statistician. Most other beginners' R books are written by non-bio statisticians so the topics covered are not as relevant.
http://www.amazon.com/Introductory-Statistics-R-Computing/dp/0387790535/ref=sr_1_6?ie=UTF8&qid=1348259885&sr=8-6&keywords=r
Pagano
http://www.thomsonedu.com/statistics/discipline_content/dataLibrary.html
Rosner
http://www.cengage.com/cgi-wadsworth/course_products_wp.pl?fid=M20bI&product_isbn_issn=9780538733496
Applied Regression Analysis and Other Multivariable Methods, 4th Edition
http://www.brookscole.com/cgi-wadsworth/course_products_wp.pl?fid=M20b&flag=student&product_isbn_issn=9780495384960&disciplinenumber=1038&template=AUS
Applied Logistic Regression
http://www.umass.edu/statdata/statdata/data/index.html
Modelling Survival Data in Medical Research, Second Edition
http://www.crcpress.com/product/isbn/9781584883258
Applied Longitudinal Analysis, 2ndEdition
http://www.biostat.harvard.edu/~fitzmaur/ala2e/
## Read a .csv file named data.csv in HOME/statistics/ISwR/ into a new object data.object1
data.object1 <- read.csv("~/statistics/ISwR/data.csv")
## Read a .csv file of your choice: file.choose() helps you choose a file, then that file is passed to read.csv()
data.object2 <- read.csv(file.choose())
### Introduction to R
## On 2012-09-21 @HSPH
## By Kazuki Yoshida
## Define function that installs a package if not already installed
inst <- function (PKG) {
if (!(PKG %in% rownames(installed.packages()))) {
install.packages(pkgs = PKG, dependencies = TRUE)
}
}
#################### Dad ####################
inst("rgl") ; library(rgl)
open3d(windowRect=c(0, 0, 600, 600))
clear3d("all")
light3d()
material3d(shininess=100, specular="black")
## Head
radius <- function(d) {
pchisq(d^2, 3)
}
shade3d(ellipse3d(diag(3), level=radius(1),
centre=c(0, 0, 1)),
color="yellow")
## Neck
## logo is 100x76
png("rlogoExtended.png",
width=500, height=250)
library(grid)
grid.rect(gp=gpar(col=NA, fill="yellow"))
library(png)
rlogo <- as.raster(readPNG(system.file("extra", "Rlogo.png",
package="RGraphics")))
rlogo[rlogo == "##FFFFFF"] <- "yellow"
grid.raster(rlogo, x=.6, y=.01, width=.08, just=c("bottom"))
dev.off()
shade3d(cylinder3d(cbind(0, 0, c(-1.4, 1)),
e1=cbind(0, 0, 1),
e2=cbind(1, 0, 0),
sides=100),
color="yellow",
texture="rlogoExtended.png",
texcoords=list(x=rep(seq(1, 0, length.out=101), each=4)[-c(1:2, 403:404)],
y=rep(c(0, 1, 1, 0), 50)))
old <- function() {
shade3d(cylinder3d(cbind(0, 0, c(-1.3, 1)),
e1=cbind(0, 0, 1),
e2=cbind(1, 0, 0),
sides=100),
color="yellow")
}
## Eyes
eyeball <- ellipse3d(diag(3), level=radius(.4))
shade3d(translate3d(eyeball, .8, .35, .7),
color="white")
shade3d(translate3d(eyeball, .8, -.35, .7),
color="white")
## Translate radius of eye, rotate, translate position of eye
pupil <- rotate3d(translate3d(ellipse3d(diag(3),
level=radius(.05)),
.4, 0, 0),
30/180*pi, 0, 0, 1)
shade3d(translate3d(pupil, .8, .35, .7),
color="black")
shade3d(translate3d(pupil, .8, -.35, .7),
color="black")
## points3d(1.21, c(-.35, .35), .7, cex=3)
## Nose
shade3d(cylinder3d(cbind(c(1, 1.3), 0, .3),
radius=.2,
e1=cbind(1, 0, 0),
e2=cbind(0, 1, 0),
sides=100),
color="yellow")
shade3d(ellipse3d(diag(3), level=radius(.2),
centre=c(1.3, 0, .3)),
color="yellow")
## Mouth
shade3d(ellipse3d(diag(3), level=radius(.8),
centre=c(.6, 0, -.5)),
color="tan")
angle <- seq(-65, 65, length=30)/180*pi
lines3d(.6 + .81*cos(angle), .81*sin(angle), -.5, lwd=3)
## Hair on top
angle <- seq(15, 165, length=30)/180*pi
lines3d(.2, .7*cos(angle), 1.5 + .7*sin(angle), lwd=3)
lines3d(-.2, .7*cos(angle), 1.5 + .7*sin(angle), lwd=3)
## Hair on sides
lines3d(seq(.5, -.5, length=5), -1, rep(c(.3, .8), length=5), lwd=3)
lines3d(seq(.5, -.5, length=5), 1, rep(c(.3, .8), length=5), lwd=3)
par3d(userMatrix=rotationMatrix(-pi/2, 1, 0, 0)%*%
rotationMatrix(80/180*pi, 0, 0, 1)%*%
rotationMatrix(10/180*pi, 1, 0, 0))
#################### Regression plane ####################
## Load Ginzberg dataset ?Ginzberg for help
inst("car") ; library(car)
data(Ginzberg)
## Predict depression score from simplicity and fatality score
lm.ginz <- lm(data = Ginzberg,
adjdep ~ adjsimp + adjfatal)
ginz.fun.no.int <- function(X, Y) {
predict(lm.ginz, newdata = data.frame(adjsimp = X, adjfatal = Y))
}
## Predict depression score from simplicity and fatality score (interaction included)
lm.ginz.int <- lm(data = Ginzberg,
adjdep ~ adjsimp + adjfatal + adjsimp:adjfatal)
ginz.fun.int <- function(X, Y) {
predict(lm.ginz.int, newdata = data.frame(adjsimp = X, adjfatal = Y))
}
## Create X and Y axes
axis.simplicity <- seq(0, 3.0, length = 50)
axis.fatality <- seq(0, 2.5, length = 50)
## Create Z axis values using previously defined prediction models
axis.dep.no.int <- outer(axis.simplicity, axis.fatality, ginz.fun.no.int)
axis.dep.int <- outer(axis.simplicity, axis.fatality, ginz.fun.int)
## Load rgl for 3D modeling
inst("rgl") ; library(rgl)
## Plot 3D scatter plot
with(Ginzberg, {
plot3d(x = adjsimp,
y = adjfatal,
z = adjdep,
type = "s", # s_phere
col = "red",
size = 2
)
})
## Add regression plane from no interaction model
surface3d(x = axis.simplicity,
y = axis.fatality,
z = axis.dep.no.int,
col = 3, alpha = 0.2)
## Add curved regression plane from interaction model
surface3d(x = axis.simplicity,
y = axis.fatality,
z = axis.dep.int,
col = 3, alpha = 0.7)
#################### Gapminder ####################
## Install and load packages
inst("datamart") ; library(datamart)
inst("ggplot2") ; library(ggplot2)
inst("reshape2") ; library(reshape2)
inst("plyr") ; library(plyr)
inst("animation") ; library(animation)
## Establish connection to Gapminder data
gm <- gapminder()
## Define a wrapper function used to extract typical datasets
gapminder2df <- function(gm, data.name) {
require(reshape2)
data.object <- query(gm, data.name)
df.data.object <- data.frame(data.object)
names(df.data.object) <- names(data.object)
df.data.object$date <- as.Date(rownames(df.data.object))
df.data.object.melt <-
melt(data = df.data.object,
id.var = c("date"),
variable.name = "country",
value.name = paste(data.name))
df.data.object.melt
}
## Query for data available
queries(gm)
## These share a common structure: time ~ country (values in cells)
TotalFertilityRate <- gapminder2df(gm, "TotalFertilityRate")
IncomePerCapita <- gapminder2df(gm, "IncomePerCapita")
Population <- gapminder2df(gm, "Population")
## MainReligion has only two columns: Entity and Group
MainReligion <- data.frame(query(gm, "MainReligion"))
names(MainReligion) <- c("country","MainReligion")
MainReligion[MainReligion$MainReligion == "", "MainReligion"] <- "other"
MainReligion$MainReligion <- factor(MainReligion$MainReligion)
## Reference
## http://stackoverflow.com/questions/8091303/merge-multiple-data-frames-in-a-list-simultaneously
## left reduce computes l_1 = f(v_1, v_2), l_2 = f(l_1, v_3), etc., and returns l_{n-1} = f(l_{n-2}, v_n)
list.of.data.frames <- list(TotalFertilityRate = TotalFertilityRate,
IncomePerCapita = IncomePerCapita,
Population = Population)
df.merged <- Reduce(function(...) merge(..., all = TRUE), list.of.data.frames)
## Left joint: df.merged <- MainReligion
df.merged <- merge(x = df.merged, y = MainReligion,
all.x = TRUE, all.y = FALSE)
## Use only complete cases (inner join is another option)
df.merged.complete <- df.merged[complete.cases(df.merged),]
countries.of.interestn <- c("Japan","United Kingdom","USSR","Russia","China","United States","India","Iran","Saudi Arabia","Qatar","Korea, Rep.","Korea, Dem. Rep.","Brunei","Kuwait","Norway","Luxembourg","United Arab Emirates","Korea, United","Iraq","Sweden")
df.merged.complete$count.int <- as.character("")
df.merged.complete$count.int[df.merged.complete$country %in% countries.of.interestn] <-
as.character(df.merged.complete$country[df.merged.complete$country %in% countries.of.interestn])
ani.start()
d_ply(.data = subset(df.merged.complete, date %in% as.Date(paste(seq(1901, 2011, 5), "-01-01", sep = ""))),
.variables = "date",
function(single.year) {
gg.graph <-
ggplot(single.year) +
geom_point(aes(x = IncomePerCapita, y = TotalFertilityRate, group = MainReligion, color = country),
alpha = 2/3) +
geom_text(aes(x = IncomePerCapita, y = TotalFertilityRate, label = count.int),
hjust = -0.1, vjust = 0.5, angle = 45, size = 3) +
theme_bw() +
scale_x_continuous(limit = c(0,120000)) + scale_y_continuous(limit = c(0, 10)) +
facet_wrap(~ MainReligion, drop = FALSE) +
opts(title = single.year[1,"date"], legend.position = "none")
print(gg.graph)
})
ani.stop()