## Load Data
trees <- read.csv("C:/Users/Claudio Alvarez/Desktop/livetrees.csv")
## SD
sd(trees$dbh, na.rm = T) #remove missing values
## [1] 16.02
## Histogram
hist(trees$dbh)
## crude map
symbols(trees$x, trees$y, circles = trees$dbh/20, inches = F)
## random sample subset 100 trees
samp.100.rows <- sample(1:nrow(trees), 100) #randomly sample rows of the table
trees.samp1 <- trees[samp.100.rows, ] #slice selected rows and all colums from the data.frame
samp.500.rows <- sample(1:nrow(trees), 500) #randomly sample rows of the table
trees.samp2 <- trees[samp.500.rows, ] #slice selected rows and all colums from the data.frame
# use points to overlay the 100 sampled locations onto original map
symbols(trees$x, trees$y, circles = trees$dbh/20, inches = F)
points(trees.samp1$x, trees.samp1$y, pch = 16, cex = 1, col = "red")
# MEANS
mean(trees$dbh, na.rm = T) #mean of the population
## [1] 11.72
mean(trees.samp2$dbh, na.rm = T) #mean of the 500 tree sample
## [1] 11.09
# SD
sd(trees$dbh, na.rm = T) #SD of the population
## [1] 16.02
sd(trees.samp1$dbh, na.rm = T) #SD of the 100 tree sample
## [1] 19.84
sd(trees.samp2$dbh, na.rm = T) #SD of the 500 tree sample
## [1] 15.48
# Chance error
mean(trees$dbh, na.rm = T) - mean(trees.samp1$dbh, na.rm = T)
## [1] -2.243
The average magnitude of these chance errors depends upon sample size. In the extreme case if our sample included 100% of the trees the chance error would be zero.
# create an empty data frame to hold results
results <- data.frame(sampleMean = numeric(), sampleSD = numeric(), sampleSize = character())
# choose a range of sample sizes to explore
ss <- c(10, 50, 100, 500, 1000, 1100)
for (size in ss) {
sMeans <- matrix(nrow = 10000)
sSDs <- matrix(nrow = 10000)
for (j in 1:10000) {
aSample <- sample(trees$dbh, size)
sMeans[j] <- mean(aSample, na.rm = T)
sSDs[j] <- sd(aSample, na.rm = T)
}
newRows <- data.frame(sampleMean = sMeans, sampleSD = sSDs, sampleSize = rep(size,
10000))
results <- rbind(results, newRows)
}
results$sampleSize <- as.factor(results$sampleSize)
boxplot(results$sampleMean ~ results$sampleSize, xlab = "Sample Size", ylab = "Standard Deviation of Sample Means",
main = "Standard Errors and Sample Size")
The plot above shows how the standard error declines declines as a function of sample size. If we believe that the “truth” lies somewhere withing the range of samples means we observed we can interpret this plot as uncertainty about the population mean decreasing as sample size increases. The amount of variation in the forest does not change just because our sample size changes. However, changes in sample size change our level of confidence in our knowledge of the forest.