IanMac — Feb 5, 2013, 4:42 PM
#Dataframe (iQ) with 350 group 1 members and 650 group 2 members with the following characteristics (variables):
#iQ scores (iQ): norm distribution
#Ages (ages): random 5 - 95
#Weights (kg): random 50 - 150
iq1 <- rep(c('1'),each = 350) # group 1
set.seed(1)
iq1scores <- rnorm(350, 95, 20) # group 1 iQ (lower mean and dispersion)
iq1 <- cbind(iq1scores, iq1) # combined group 1
iq2 <- rep(c('2'),each = 650) # group 2
set.seed(2)
iq2scores <- rnorm(650, 105, 25) # group 2 iQ (higher mean and dispersion)
iq2 <- cbind(iq2scores, iq2) # combined group 2
iQ <- as.data.frame(rbind(iq1, iq2),stringsAsFactors=F) # put both together in a data frame
colnames(iQ) <- c('iQ', 'group') # define column names
iQ$iQ <- as.numeric(iQ$iQ) # iQ values as numeric
iQ_avg <- mean(iQ$iQ) #calc average iQ
set.seed(3)
ages <- sample(5:95, 1000, replace=T) #create some ages (intergers) for the indivduals
iQ <- cbind(iQ, ages) #add ages to the existing data frame
ages_avg <- mean(iQ$ages) #calc average age
set.seed(4)
kg <- runif(1000, 50, 150) #create some random weights (numbers) for the indivduals
iQ <- cbind(iQ, kg) #add weights to the existing data frame
kg_avg <- mean(iQ$kg) #calc average weight
##Dataframe now created, ready for plotting.
head(iQ)
iQ group ages kg
1 82.47 1 20 108.58
2 98.67 1 78 50.89
3 78.29 1 40 79.37
4 126.91 1 34 77.74
5 101.59 1 59 131.36
6 78.59 1 59 76.04
tail(iQ)
iQ group ages kg
995 92.95 2 16 93.14
996 124.32 2 23 138.00
997 75.27 2 20 82.30
998 114.30 2 52 58.29
999 81.40 2 48 78.04
1000 89.87 2 53 87.54
#scatterplots
plot(iQ$ages, iQ$iQ, pch=19, col="blue", cex=2) #scattered blue spots
plot(iQ$ages, iQ$iQ, pch=19, col="blue", cex=0.5) #smaller dots (better for detail)
plot(iQ$ages, iQ$iQ, pch=19, col=iQ$group, cex=0.5) #use group number (numeric) for colour
library(Hmisc)
Loading required package: survival
Loading required package: splines
Hmisc library by Frank E Harrell Jr
Type library(help='Hmisc'), ?Overview, or ?Hmisc.Overview') to see overall
documentation.
NOTE:Hmisc no longer redefines [.factor to drop unused levels when
subsetting. To get the old behavior of Hmisc type dropUnusedLevels().
Attaching package: 'Hmisc'
The following object(s) are masked from 'package:survival':
untangle.specials
The following object(s) are masked from 'package:base':
format.pval, round.POSIXt, trunc.POSIXt, units
plot(iQ$ages, iQ$iQ, pch=19, col=cut2(iQ$kg, g=3), cex=1) #colour dots by weight range (x3)
plot(iQ$ages, iQ$iQ, pch=19, col=iQ$group, cex=kg/kg_avg*1) #size dots for relative kg weight
lines(iQ$ages, rep(iQ_avg, 1000),col="green",lwd=10) #add a line at avg iQ level
points(seq(5, 95,length=90),seq(180, 40,length=90),col="blue",pch=1, cex=1) #points for illustration
#alternatives to see detail for a lot of points
plot(iQ$ages, iQ$iQ, pch=19, cex=2) #regular plot
smoothScatter(iQ$ages, iQ$iQ, pch=19, cex=2) #smooth scatter plot
KernSmooth 2.23 loaded Copyright M. P. Wand 1997-2009
library(hexbin) #load hexbin package
Loading required package: grid
Loading required package: lattice
plot(hexbin(iQ$ages, iQ$iQ, xbins=12, xlab='Age', ylab='iQ')) #variable shade based on count within hexagons
#QQ plot
qqplot(iQ$ages, iQ$iQ, col='orange') #plots corresponding quantiles of x and y
#Visually checking for NAs (missing data)
x <- rnorm(100)
y <- rnorm(100)
y[x<0]<-NA #make y value = NA when x is negative
boxplot(x ~ is.na(y), xlab='y value is NA?', col=c('blue', 'red')) #y NAs in red