Introduction
Getting data into R
Accessing variables and managing subsets
An introduction to basic plotting tools
Graphing tools
An introduction to the Lattice Package
Common R mistakes
?boxplot
## starting httpd help server ... done
help(boxplot)
boxplot(count~spray,data=InsectSprays,col="lightgray")
?InsectSprays
2+2
## [1] 4
2+2^2
## [1] 6
(2+2)^2
## [1] 16
sqrt(2)
## [1] 1.414214
log(2)
## [1] 0.6931472
x=5
y=10
z <- x+y
z
## [1] 15
seq(1,5, by=0.5)
## [1] 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0
v1 = c(6,5,4,3,2,1)
v1
## [1] 6 5 4 3 2 1
v2 = c(10,9,8,7,6,5)
v3 = v1+v2
v3
## [1] 16 14 12 10 8 6
max(v3);min(v3)
## [1] 16
## [1] 6
length(v3)
## [1] 6
mean(v3)
## [1] 11
sd(v3)
## [1] 3.741657
A collection of previously programmed functions, often including functions for specific tasks
# Check if MASS package is installed
if (!requireNamespace("MASS", quietly = TRUE)) {
# Install MASS package
install.packages("MASS")
}
# Load MASS package
library(MASS)
citation()
##
## To cite R in publications use:
##
## R Core Team (2023). R: A language and environment for statistical
## computing. R Foundation for Statistical Computing, Vienna, Austria.
## URL https://www.R-project.org/.
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {R: A Language and Environment for Statistical Computing},
## author = {{R Core Team}},
## organization = {R Foundation for Statistical Computing},
## address = {Vienna, Austria},
## year = {2023},
## url = {https://www.R-project.org/},
## }
##
## We have invested a lot of time and effort in creating R, please cite it
## when using it for data analysis. See also 'citation("pkgname")' for
## citing R packages.
citation("lattice")
##
## To cite the lattice package in publications use:
##
## Sarkar, Deepayan (2008) Lattice: Multivariate Data Visualization with
## R. Springer, New York. ISBN 978-0-387-75968-5
##
## A BibTeX entry for LaTeX users is
##
## @Book{,
## title = {Lattice: Multivariate Data Visualization with R},
## author = {Deepayan Sarkar},
## publisher = {Springer},
## address = {New York},
## year = {2008},
## note = {ISBN 978-0-387-75968-5},
## url = {http://lmdvr.r-forge.r-project.org},
## }
| Function | Purpose | Example |
|---|---|---|
help.start |
Access help on getting started | help.start() |
? |
Access help files | ?boxplot |
# |
Add comments | #add your comments here |
boxplot |
Makes a boxplot | boxplot(count \~ spray, data = InsectSprays, col = "lightgray") |
sqrt |
Square root | sqrt(2) |
log |
Natural logarithm | log(2) |
seq |
Sequence of numbers | seq(1,5, by=.5) |
max |
Maximum | max(1,2,3,4,5) |
min |
Minimum | min(1,2,3,4,5) |
mean |
Mean | mean(x) |
sd |
Standard deviation | sd(1:2) \^ 2 |
install.packages |
Install package | install.packages("MASS") |
library |
load package | library(MASS) |
q |
quit R | q() |
citation |
citation for R | citation() |
a <- 59
b <- 55
c <- 53.5
d <- 55
e <- 52.5
a
## [1] 59
Wing1 <- 59
Wing2 <- 55
Wing3 <- 53.5
Wing4 <- 55
Wing5 <- 52.5
sqrt(Wing1)
## [1] 7.681146
2*Wing1
## [1] 118
Wing1 + Wing2 + Wing3 + Wing4 + Wing5
## [1] 275
(Wing1 + Wing2 + Wing3 + Wing4 + Wing5)/5
## [1] 55
c() functionWingcrd <- c(59,55,53.5,55,52.5,57.5,53,55)
Wingcrd
## [1] 59.0 55.0 53.5 55.0 52.5 57.5 53.0 55.0
Wingcrd[1]
## [1] 59
Wingcrd[1:5]
## [1] 59.0 55.0 53.5 55.0 52.5
Wingcrd[-2]
## [1] 59.0 53.5 55.0 52.5 57.5 53.0 55.0
sum(Wingcrd)
## [1] 440.5
S.win <-sum(Wingcrd)
S.win
## [1] 440.5
Tarsus <- c(22.3,19.7,20.8,20.3,20.8,21.5,20.6,21.5)
Head <- c(31.2,30.4,30.6,30.3,30.3,30.8,32.5,NA)
Wt <- c(9.5,13.8,14.8,15.2,15.5,15.6,15.6,15.7)
sum(Head)
## [1] NA
sum(Head,na.rm=TRUE)
## [1] 216.1
c, cbind,
rbindBirdData <- c(Wingcrd, Tarsus, Head, Wt) #contains the original variables in vectors
BirdData
## [1] 59.0 55.0 53.5 55.0 52.5 57.5 53.0 55.0 22.3 19.7 20.8 20.3 20.8 21.5 20.6
## [16] 21.5 31.2 30.4 30.6 30.3 30.3 30.8 32.5 NA 9.5 13.8 14.8 15.2 15.5 15.6
## [31] 15.6 15.7
BirdData[3]
## [1] 53.5
BirdData is a single vector of length 32
[1] is the index number of the first element
[3] is the third element
Z <- cbind(Wingcrd,Tarsus,Head,Wt) #contains the original variables in columns
Z
## Wingcrd Tarsus Head Wt
## [1,] 59.0 22.3 31.2 9.5
## [2,] 55.0 19.7 30.4 13.8
## [3,] 53.5 20.8 30.6 14.8
## [4,] 55.0 20.3 30.3 15.2
## [5,] 52.5 20.8 30.3 15.5
## [6,] 57.5 21.5 30.8 15.6
## [7,] 53.0 20.6 32.5 15.6
## [8,] 55.0 21.5 NA 15.7
dim(Z)
## [1] 8 4
Z2 <- rbind(Wingcrd,Tarsus,Head,Wt) #combines the data in rows
Z2
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
## Wingcrd 59.0 55.0 53.5 55.0 52.5 57.5 53.0 55.0
## Tarsus 22.3 19.7 20.8 20.3 20.8 21.5 20.6 21.5
## Head 31.2 30.4 30.6 30.3 30.3 30.8 32.5 NA
## Wt 9.5 13.8 14.8 15.2 15.5 15.6 15.6 15.7
dim(Z2)
## [1] 4 8
Dmat <- matrix(nrow=8,ncol=4)
Dmat
## [,1] [,2] [,3] [,4]
## [1,] NA NA NA NA
## [2,] NA NA NA NA
## [3,] NA NA NA NA
## [4,] NA NA NA NA
## [5,] NA NA NA NA
## [6,] NA NA NA NA
## [7,] NA NA NA NA
## [8,] NA NA NA NA
Dmat[,1] <- c(59,55,53.5,55,52.5,57.5,53,55)
Dmat[,2] <- c(22.3,19.7,20.8,20.3,20.8,21.5,20.6,21.5)
Dmat[,3] <- c(31.2,30.4,30.6,30.3,30.3,30.8,32.5,NA)
Dmat[,4] <- c(9.5,13.8,14.8,15.2,15.5,15.6,15.6,15.7)
Dmat
## [,1] [,2] [,3] [,4]
## [1,] 59.0 22.3 31.2 9.5
## [2,] 55.0 19.7 30.4 13.8
## [3,] 53.5 20.8 30.6 14.8
## [4,] 55.0 20.3 30.3 15.2
## [5,] 52.5 20.8 30.3 15.5
## [6,] 57.5 21.5 30.8 15.6
## [7,] 53.0 20.6 32.5 15.6
## [8,] 55.0 21.5 NA 15.7
colnames(Dmat) <- c("Wingcard","Tarsus","Head","Wt")
Dmat
## Wingcard Tarsus Head Wt
## [1,] 59.0 22.3 31.2 9.5
## [2,] 55.0 19.7 30.4 13.8
## [3,] 53.5 20.8 30.6 14.8
## [4,] 55.0 20.3 30.3 15.2
## [5,] 52.5 20.8 30.3 15.5
## [6,] 57.5 21.5 30.8 15.6
## [7,] 53.0 20.6 32.5 15.6
## [8,] 55.0 21.5 NA 15.7
Dmat2 <- as.matrix(cbind(Wingcrd,Tarsus,Head,Wt))
Dmat2
## Wingcrd Tarsus Head Wt
## [1,] 59.0 22.3 31.2 9.5
## [2,] 55.0 19.7 30.4 13.8
## [3,] 53.5 20.8 30.6 14.8
## [4,] 55.0 20.3 30.3 15.2
## [5,] 52.5 20.8 30.3 15.5
## [6,] 57.5 21.5 30.8 15.6
## [7,] 53.0 20.6 32.5 15.6
## [8,] 55.0 21.5 NA 15.7
Dfrm <- data.frame(WC=Wingcrd,TS=Tarsus,HD=Head,W=Wt)
Dfrm
## WC TS HD W
## 1 59.0 22.3 31.2 9.5
## 2 55.0 19.7 30.4 13.8
## 3 53.5 20.8 30.6 14.8
## 4 55.0 20.3 30.3 15.2
## 5 52.5 20.8 30.3 15.5
## 6 57.5 21.5 30.8 15.6
## 7 53.0 20.6 32.5 15.6
## 8 55.0 21.5 NA 15.7
Dfrm <- data.frame(WC=Wingcrd,TS=Tarsus,HD=Head,W=Wt,Wsq=sqrt(Wt))
Dfrm
## WC TS HD W Wsq
## 1 59.0 22.3 31.2 9.5 3.082207
## 2 55.0 19.7 30.4 13.8 3.714835
## 3 53.5 20.8 30.6 14.8 3.847077
## 4 55.0 20.3 30.3 15.2 3.898718
## 5 52.5 20.8 30.3 15.5 3.937004
## 6 57.5 21.5 30.8 15.6 3.949684
## 7 53.0 20.6 32.5 15.6 3.949684
## 8 55.0 21.5 NA 15.7 3.962323
.csv file formatread.table or read.csv function#test <- read.csv("test1.csv",header=T,sep=",")
| Function | Purpose | Example |
sum |
calculated the sum | sum(x, na.rm= TRUE) |
median |
calculated the median | median(x, na.rm= TRUE) |
max |
calculated the maximum | max((x, na.rm= TRUE) |
min |
calculated the minimum | min(x, na.rm= TRUE) |
c |
concatenate data | c(1,2,3) |
cbind |
combine variables in columns | cbind(x,y,z) |
rbind |
combine variables in rows | rbind(x,y,z) |
vector |
combine data in a vector | vector(length=10) |
matrix |
combine data in a matrix | matrix(nrow=5,ncol=10) |
data.frame |
combine data in a data frame | data.frame(x=x,y=y,z=z) |
rep |
repeat values or variables | rep(c(1,2,3),each=10) |
seq |
create a sequence of numbers | seq(1,10) |
dim |
dimension of a matrix or cbind output | dim(Mydata) |
colnames |
column names of a matrix or cbind output | colnames(Mydata) |
row names |
row names of a matrix or cbind output | rownames(MyData) |
read.csv |
read data from a .csv file | read.csv(file="test.txt",header=T,sep=",") |
# Check if pls package is installed
if (!requireNamespace("pls", quietly = TRUE)) {
# Install pls package
install.packages("pls")
}
# Load pls package
library(pls)
##
## Attaching package: 'pls'
## The following object is masked from 'package:stats':
##
## loadings
gasoline
gasoline - A data set consisting of octane number
(octane) and NIR spectra (NIR) of 60 gasoline samples. Each NIR spectrum
consists of 401 diffuse reflectance measurements from 900 to 1700 nm
dim(gasoline)
## [1] 60 2
names(gasoline)
## [1] "octane" "NIR"
str functionstr(gasoline)
## 'data.frame': 60 obs. of 2 variables:
## $ octane: num 85.3 85.2 88.5 83.4 87.9 ...
## $ NIR : 'AsIs' num [1:60, 1:401] -0.0502 -0.0442 -0.0469 -0.0467 -0.0509 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:60] "1" "2" "3" "4" ...
## .. ..$ : chr [1:401] "900 nm" "902 nm" "904 nm" "906 nm" ...
$ signgasoline$octane
## [1] 85.30 85.25 88.45 83.40 87.90 85.50 88.90 88.30 88.70 88.45 88.75 88.25
## [13] 87.30 88.00 88.70 85.50 88.65 88.75 85.40 88.60 87.00 87.15 87.05 87.25
## [25] 86.85 88.65 86.60 86.00 86.10 86.50 86.30 84.40 84.70 84.60 84.50 88.10
## [37] 85.25 88.40 88.20 88.40 88.55 88.35 88.20 85.30 88.50 88.25 88.00 88.85
## [49] 88.45 88.70 88.10 87.60 88.35 85.10 85.10 84.70 87.20 86.60 89.60 87.10
attach adds gasoline to the search path of R
attach(gasoline)
octane
## [1] 85.30 85.25 88.45 83.40 87.90 85.50 88.90 88.30 88.70 88.45 88.75 88.25
## [13] 87.30 88.00 88.70 85.50 88.65 88.75 85.40 88.60 87.00 87.15 87.05 87.25
## [25] 86.85 88.65 86.60 86.00 86.10 86.50 86.30 84.40 84.70 84.60 84.50 88.10
## [37] 85.25 88.40 88.20 88.40 88.55 88.35 88.20 85.30 88.50 88.25 88.00 88.85
## [49] 88.45 88.70 88.10 87.60 88.35 85.10 85.10 84.70 87.20 86.60 89.60 87.10
removes adds gasoline to the search path of R
detach(gasoline)
octane
Error: object 'octane' not found
writes the gasoline file in a .csv format and will be
saved in the directory of choice
#write.csv(gasoline,file="gasoline.csv",quote=FALSE)
| Function | Purpose | Example |
write.csv |
writes and saves the variables in .csv | write.csv(gasoline,file=\"test.csv",quote=FALSE) |
attach |
make a variable inside a data frame available | attach(gasoline |
str |
shows the internal structure of an object | str(gasoline) |
$ |
used to access a variable | gasoline\$octane |
names |
provides the names of the variables | names(gasoline) |
dim |
provides the dimensions of the variables | dim(x) |
detach |
make a variable inside a data frame unavailable | detach(gasoline) |
plot(cars)
lines(lowess(cars))
plot(cars,pch=17,main="distance vs time", xlim=c(5,15), ylim=c(0,80))
lines(lowess(cars))
plot(cars,pch=17,main="distance vs time", xlim=c(5,15), ylim=c(0,80),col=4)
lines(lowess(cars),col=2)
plot(cars,pch=17,cex=5,main="distance vs time", xlim=c(5,15), ylim=c(0,80),col=4)
| Function | Purpose | Example |
plot |
plots y versus x | plot(x,y,xlab="x label",xlim=c(0,1),ylim=c(0,2),pch=1,main="Main",ylab="Y label",col=1) |
lines |
adds lines to an existing graph | lines(x,y,col=1) |
loess |
apply LOESS smoothing | loess(y\~x) |
# Check if plotrix package is installed
if (!requireNamespace("plotrix", quietly = TRUE)) {
# Install plotrix package
install.packages("plotrix")
}
# Load plotrix package
library(plotrix)
slices <- c(10, 12, 4, 16, 8)
lbls <- c("US", "UK", "Australia", "Germany", "France")
pie3D(slices,labels=lbls,explode=0.1,
main="Pie Chart of Countries ")
pct <- round(slices/sum(slices)*100)
lbls <- paste(lbls, pct) # add percents to labels
lbls <- paste(lbls,"%",sep="") # ad % to labels
pie3D(slices,labels=lbls,explode=0.1,
main="Pie Chart of Countries ")
counts <- table(mtcars$gear)
barplot(counts, main="Car Distribution",
xlab="Number of Gears")
# Boxplot of MPG by Car Cylinders
boxplot(mpg~cyl,data=mtcars, main="Car Milage Data",
xlab="Number of Cylinders", ylab="Miles Per Gallon")
dotchart(VADeaths, main = "Death Rates in Virginia - 1940")
op <- par(xaxs = "i") # 0 -- 100%
dotchart(t(VADeaths), xlim = c(0,100),
main = "Death Rates in Virginia - 1940")
par(op) #used to set or query graphical parameters.
pairs(iris[1:4], main = "Anderson's Iris Data -- 3 species",
pch = 21, bg = c("red", "green3", "blue")[unclass(iris$Species)])
library(graphics)
coplot(lat ~ long | depth, data = quakes)
given.depth <- co.intervals(quakes$depth, number = 4, overlap = .1)
coplot(lat ~ long | depth, data = quakes, given.v = given.depth, rows = 1)
Function
| Purpose | Example | |
pie3D |
displays a 3D pie chart with optional labels | pie3D(x,...) |
barplot |
creates a bar plot with vertical or horizontal bars | barplot(height, \...) |
boxplot |
produces box-and-whisker plot(s) of the given (grouped) values | boxplot(x, \...) |
dotchart |
draws a Cleveland dot plot | dotchart(x,...) |
pairs |
a matrix of scatterplots is produced | pairs(x, \...) |
coplot |
produces two variants of the conditioning plots | coplot(formula, data,...) |
Lattice is an add-on package that implements
Trellis graphics (originally developed for S
and S-PLUS) in R.
A powerful and elegant high-level data visualization system, with an emphasis on multivariate data.
Sufficient for typical graphics needs.
Flexible enough to handle most nonstandard requirements
# Check if lattice package is installed
if (!requireNamespace("lattice", quietly = TRUE)) {
# Install lattice package
install.packages("lattice")
}
# Load lattice package
library(lattice)
# Check if mlmRev package is installed
if (!requireNamespace("mlmRev", quietly = TRUE)) {
# Install mlmRev package
install.packages("mlmRev")
}
# Load mlmRev package
library(mlmRev)
## Loading required package: lme4
## Loading required package: Matrix
data(Chem97)
head(Chem97)
## lea school student score gender age gcsescore gcsecnt
## 1 1 1 1 4 F 3 6.625 0.3393157
## 2 1 1 2 10 F -3 7.625 1.3393157
## 3 1 1 3 10 F -4 7.250 0.9643157
## 4 1 1 4 10 F -2 7.500 1.2143157
## 5 1 1 5 8 F -1 6.444 0.1583157
## 6 1 1 6 10 F 4 7.750 1.4643157
The dataset records information on students appearing in the 1997 A-level chemistry examination in Britain. We are only interested in the following variables:
score: point score in the A-level exam, with six
possible values (0, 2, 4, 6, 8).
gcsescore: average score in GCSE exams. This is a
continuous score that may be used as a predictor of the A-level
score.
gender: gender of the student.
histogram(~ gcsescore | factor(score), data = Chem97)
densityplot(~ gcsescore | factor(score), Chem97, groups = gender,
plot.points = FALSE, auto.key = TRUE)
qqmath(~ gcsescore | factor(score), Chem97, groups = gender,
f.value = ppoints(100), auto.key = TRUE,
type = c("p", "g"), aspect = "xy")
bwplot(factor(score) ~ gcsescore | gender, Chem97)
stripplot(depth ~ factor(mag), data = quakes,
jitter.data = TRUE, alpha = 0.6,
main = "Depth of earthquake epicenters by magnitude",
xlab = "Magnitude (Richter)",
ylab = "Depth (km)")
cloud(depth ~ lat * long, data = quakes,
zlim = rev(range(quakes$depth)),
screen = list(z = 105, x = -70), panel.aspect = 0.75,
xlab = "Longitude", ylab = "Latitude", zlab = "Depth")
dotplot(VADeaths, type = "o",
auto.key = list(points = TRUE, lines = TRUE, space = "right"))
| Function | Purpose | Example |
histogram |
draw histograms | histogram(x, data, \...) |
densityplot |
draw Kernel Density Plots | densityplot(x, data, \...) |
qqmath |
draw quantile-Quantile plots of a sample against a theoretical distribution | qqmath(x, data, \...) |
bwplot |
produces box-and-whisker plots | bwplot(x, data, \...) |
stripplot |
produces strip plot | stripplot(\...) |
cloud |
generic functions to draw 3d scatter plots and surfaces. | cloud(x, data, \...) |
dotplot |
produces dot plot | dotplot(\...) |
Errors in the source file (no space)
Decimal point or comma separation
Directory names and folder location
Incorrect directory names and folder location
Avoid non-English alphabetical characters
Did not save R workspace
# save your command history
savehistory(file="myfile") # default is ".Rhistory"
# save the workspace to the file .RData in the cwd
save.image()