Packages

# install packages to use certain functions or data. Only need to install a package once.
install.packages("openintro")
install.packages("mosaic")

# Load and attach a package. Need to load package every time you open a new R project/session/document.
library(openintro)
library(mosaic)

Data

# Load data from openintro package
data(bdims)  # replace 'bdims' with name of data set
# get info on data
?bdims
# print/show variable names
names(bdims)
# print/show fist six observations
head(bdims)
# print last six observations
tail(bdims)
# take mean of your variable
mean(bdims$wgt)

Summary Statistics

# mean
mean(bdims$wgt)
# median
median(bdims$wgt)
# standard deviation
sd(bdims$wgt)
# IQR
iqr(bdims$wgt)
# many summary statistics
favstats(bdims$wgt)
# many summary statistics by group
favstats(wgt ~ sex, data = bdims)
# correlation
cor(bdims$wgt, bdims$hgt)

Data Visualization

# scatterplot
plot(wgt ~ hgt, data = bdims)
# scatterplot (mosaic)
xyplot(wgt ~ hgt, group = sex, data = bdims, auto.key = list(text = c("Female", "Male")))
# grouped scatterplot (mosaic)
xyplot(wgt ~ hgt | sex, data = bdims)
# histogram by group
histogram( ~ wgt | sex, data = bdims)
# boxplot
boxplot(bdims$wgt)
# boxplot by group
boxplot(wgt ~ sex, data = bdims, xlab = "Sex (0 = Female, 1 = Male)")
# bargraph
bargraph( ~ sex, data = bdims)

Numerical response vs. numerical explanatory

# regression

# fit linear regression model
model <- lm(hgt ~ wgt, data = bdims)
# view model summary
summary(model)
# plot model graphics
par(mfrow = c(2,2))   # this is just to allow 4 plots on one output (2x2)
plot(model)

Numerical response vs. categorical explanatory

# difference between two means
t.test(hgt ~ sex, data = bdims)

# anova model
model <- aov(y ~ groups, data = datasetname)
# anova table
summary(model)
# t tests to compare means
TukeyHSD(model)
# plot model graphics
plot(model)

Categorical response vs. categorical explanatory

# make table
table1 <- table(datasetname$group1, datasetname$group2)
# divide cell counts by row totals
prop.table(table1, margin = 1)
# divide cell counts by column totals
prop.table(table1, margin = 2)
# counts for two categorical variables
barplot(table1, legend = TRUE)
# proportions using column totals
barplot(prop.table(table1, margin = 2), legend = TRUE)
# chi square test of independence
chisq.test(table1, correct = FALSE)  # table1 is a two way table object
# test for difference in two proportions
prop.test(c(x1, x2), c(n1, n2), correct = FALSE)

Distributions

# probability function (normal)
xpnorm(80, mean = 100, sd = 15)
pdist("norm", 80, mean = 100, sd = 15)
pdist("norm", c(75, 80, 110), mean = 100, sd = 15)
# probability function (chi-square)
pdist("chisq", 4, df = 3)
# probability function (F)
pdist("f", 4, df1 = 5, df2 = 20)
# probability function (t)
pdist("t", 3, df = 5)
# quantile calculation (normal)
xqnorm(0.95, mean = 100, sd = 15)
qdist("norm", .95, mean = 100, sd = 15)
qdist("norm", c(.90, .95), mean = 100, sd = 15)
#quantile calculation (chi-square)
qdist("chisq", .95, 4, df = 3)
#quantile calculation (F)
qdist("f", .95, df1 = 5, df2 = 20)
# quantile calculation (t)
qdist("t", .90, df = 6)