HABITAT <- factor(c("Mixed", "Gipps.Manna", "Gipps.Manna",
"Gipps.Manna", "Mixed", "Mixed", "Mixed", "Mixed"))
HABITAT
## [1] Mixed Gipps.Manna Gipps.Manna Gipps.Manna Mixed Mixed
## [7] Mixed Mixed
## Levels: Gipps.Manna Mixed
GST <- c(3.4, 3.4, 8.4, 3, 5.6, 8.1, 8.3, 4.6)
EYR <- c(0, 9.2, 3.8, 5, 5.6, 4.1, 7.1, 5.3)
MACNALLY <- data.frame(HABITAT, GST, EYR)
MACNALLY
## HABITAT GST EYR
## 1 Mixed 3.4 0.0
## 2 Gipps.Manna 3.4 9.2
## 3 Gipps.Manna 8.4 3.8
## 4 Gipps.Manna 3.0 5.0
## 5 Mixed 5.6 5.6
## 6 Mixed 8.1 4.1
## 7 Mixed 8.3 7.1
## 8 Mixed 4.6 5.3
row.names(MACNALLY) <- c("Reedy Lake", "Pearcedale", "Warneet","Cranbourne", "Lysterfield", "Red Hill", "Devilbend", "Olinda")
MACNALLY
## HABITAT GST EYR
## Reedy Lake Mixed 3.4 0.0
## Pearcedale Gipps.Manna 3.4 9.2
## Warneet Gipps.Manna 8.4 3.8
## Cranbourne Gipps.Manna 3.0 5.0
## Lysterfield Mixed 5.6 5.6
## Red Hill Mixed 8.1 4.1
## Devilbend Mixed 8.3 7.1
## Olinda Mixed 4.6 5.3
Si su base de datos esta en alguna unidad de su computadora, puede subir el archivo cvs utiliando el comando read.table:
MACNALLY <- read.table(‘macnally.csv’, header=T, row.names=1, sep=‘,’)
Extraer de la base de datos la variable GST que sean mayores a 3
subset(MACNALLY, GST<3)
## [1] HABITAT GST EYR
## <0 rows> (or 0-length row.names)
Analice los siguientes comandos y escriba en su cuaderno la utilidad de cada una de las funciones
MACNALLY[1:5,]
## HABITAT GST EYR
## Reedy Lake Mixed 3.4 0.0
## Pearcedale Gipps.Manna 3.4 9.2
## Warneet Gipps.Manna 8.4 3.8
## Cranbourne Gipps.Manna 3.0 5.0
## Lysterfield Mixed 5.6 5.6
MACNALLY['Pearcedale',]
## HABITAT GST EYR
## Pearcedale Gipps.Manna 3.4 9.2
MACNALLY[,c('GST')]
## [1] 3.4 3.4 8.4 3.0 5.6 8.1 8.3 4.6
a<-MACNALLY[MACNALLY$GST>3,]
MACNALLY[MACNALLY$GST>3 & MACNALLY$HABITAT=="Mixed",]
## HABITAT GST EYR
## Reedy Lake Mixed 3.4 0.0
## Lysterfield Mixed 5.6 5.6
## Red Hill Mixed 8.1 4.1
## Devilbend Mixed 8.3 7.1
## Olinda Mixed 4.6 5.3
subset(MACNALLY, GST>3, select=c('GST','EYR'))
## GST EYR
## Reedy Lake 3.4 0.0
## Pearcedale 3.4 9.2
## Warneet 8.4 3.8
## Lysterfield 5.6 5.6
## Red Hill 8.1 4.1
## Devilbend 8.3 7.1
## Olinda 4.6 5.3
MACNALLY$GST
## [1] 3.4 3.4 8.4 3.0 5.6 8.1 8.3 4.6
MACNALLY[MACNALLY$HABITAT %in% c("Montane Forest","Foothills Woodland"),]
## [1] HABITAT GST EYR
## <0 rows> (or 0-length row.names)
En algunas ocaciones es necesario calcular estad?sticas de un vector por separado.
tapply(MACNALLY$GST, MACNALLY$HABITAT, mean)
## Gipps.Manna Mixed
## 4.933333 6.000000
Cuando nos interesa obtebner estad?sticas de multiples variables al mismo tiempo
aggregate(MACNALLY[c('GST','EYR')], list(Habitat=MACNALLY$HABITAT), mean)
## Habitat GST EYR
## 1 Gipps.Manna 4.933333 6.00
## 2 Mixed 6.000000 4.42
De forma alternativa el paquete “nlme” ejecuta la misma funci?n de una manera relativamente simple
library(nlme)
gsummary(MACNALLY[c("GST", "EYR")], groups = MACNALLY$HABITAT)
## GST EYR
## Gipps.Manna 4.933333 6.00
## Mixed 6.000000 4.42
alt text
alt text
ward <- read.table('ward.csv', header=T, sep=',')
boxplot(EGGS~ZONE, ward)
#pg 143 Biostatistical Design and Analysis Using R
# calculos de promedio y desviacion
ward.means<-with(ward, tapply(EGGS, ZONE, mean))
ward.sds<-with(ward, tapply(EGGS, ZONE, sd))
ward.ns<-with(ward, tapply(EGGS, ZONE, length))
ward.se <- ward.sds/sqrt(ward.ns)
xs <- barplot(ward.means, ylim=range(pretty(c(ward.means+ward.se,
ward.means-ward.se))), axes=F, xpd=F, axisnames=F, axis.lty=2, legend.text=F,
col="gray")
# plot the error bars of the winter-spring season
arrows(xs,ward.means+ward.se,xs,ward.means-ward.se, code=3, angle=90, len=.05)
# create the axes and their labels
axis(2,las=1)
axis(1,at=xs, lab=c("Littorinid","Mussel"), padj=1, mgp=c(0,0,0))
mtext(2,text="Mean number of egg capsules per capsule",line=3, cex=1)
mtext(1,text="Zone",line=3, cex=1)
box(bty="l")
Utilizamos la siguiente base de datos
tg <- ToothGrowth
head(tg)
## len supp dose
## 1 4.2 VC 0.5
## 2 11.5 VC 0.5
## 3 7.3 VC 0.5
## 4 5.8 VC 0.5
## 5 6.4 VC 0.5
## 6 10.0 VC 0.5
library(Rmisc)
## Loading required package: lattice
## Loading required package: plyr
# summarySE provides the standard deviation, standard error of the mean, and a (default 95%) confidence interval
tgc <- summarySE(tg, measurevar="len", groupvars=c("supp","dose"))
tgc
## supp dose N len sd se ci
## 1 OJ 0.5 10 13.23 4.459709 1.4102837 3.190283
## 2 OJ 1.0 10 22.70 3.910953 1.2367520 2.797727
## 3 OJ 2.0 10 26.06 2.655058 0.8396031 1.899314
## 4 VC 0.5 10 7.98 2.746634 0.8685620 1.964824
## 5 VC 1.0 10 16.77 2.515309 0.7954104 1.799343
## 6 VC 2.0 10 26.14 4.797731 1.5171757 3.432090
# Standard error of the mean
library(ggplot2)
ggplot(tgc, aes(x=dose, y=len, colour=supp)) +
geom_errorbar(aes(ymin=len-se, ymax=len+se), width=.1) +
geom_line() +
geom_point()