par(mar = c(3, 3, 3, 3))
str(PlantGrowth) # function to look at the data 'structure'
## 'data.frame': 30 obs. of 2 variables:
## $ weight: num 4.17 5.58 5.18 6.11 4.5 4.61 5.17 4.53 5.33 5.14 ...
## $ group : Factor w/ 3 levels "ctrl","trt1",..: 1 1 1 1 1 1 1 1 1 1 ...
#> 'data.frame': 30 obs. of 2 variables:
#> $ weight: num 4.17 5.58 5.18 6.11 4.5 4.61 5.17 4.53 5.33 5.14 ...
#> $ group : Factor w/ 3 levels "ctrl","trt1",..: 1 1 1 1 1 1 1 1 1 1 ...
with(PlantGrowth, hist(weight))
### 3.2 Optimal Parameters
par(mfrow=c(1,2),mar=c(5,4,4,4))
with(PlantGrowth,hist(weight))
with(PlantGrowth,hist(weight,
xlim= c(3,7), # set the x-axis range
ylim= c(0,10), # set the y-axis range
col= "lightgrey", # fill the columns
border= "black", # column border
main= "Histogram of Plant Weights", # figure title
xlab= "Dried weight (g)", # x-axis label
ylab= "Frequency")) # y-axis label
### 3.3 Changing Bin Sizes
# 1. Simulate Data:
set.seed(1234) # allow data to be reproducible (you will get exactly the same!)
# we want 1000 obs, a mean of 500 and sd of 50
simulated_data <- data.frame(rnorm(n = 1000, mean = 500, sd = 50))
# assign variable name [column #1] as 'var_name'
names(simulated_data) <- c("var_name")
str(simulated_data)
## 'data.frame': 1000 obs. of 1 variable:
## $ var_name: num 440 514 554 383 521 ...
#> 'data.frame': 1000 obs. of 1 variable:
#> $ var_name: num 440 514 554 383 521 ...
par(mfrow = c(2, 2), mar = c(5, 4, 4, 4))
with(simulated_data, hist(var_name, breaks = "Sturges", main = "Default bins"))
with(simulated_data, hist(var_name, breaks = "Scott", main = "Scott bins"))
with(simulated_data, hist(var_name, breaks = "FD", main = "FD bins"))
with(simulated_data, hist(var_name, breaks = (seq(from = 100, to = 700,
by = 10)), main = "Custom bins"))
### 3.4 Advance Histogram Features
# calculate the mean & SD and save them as objects so they can be called upon
# when creating our vertical lines
mean.weight <- with(PlantGrowth, mean(weight))
sd.weight <- with(PlantGrowth, sd(weight))
mean.weight
## [1] 5.073
#> [1] 5.07
sd.weight # see how the mean and sd have now been stored?
## [1] 0.7011918
#> [1] 0.701
par(mar=c(5,4,4,4))
# plot our histogram
with(PlantGrowth, hist(weight,xlim = c(3,8),ylim = c(0,10), xaxs= "i", yaxs="i"))
# add vertical lines for the mean and 1 sd above and below the mean
abline(v= mean.weight, lty=1, lwd=3,col="blue")
abline(v= mean.weight - (sd.weight), lty=2, lwd=1, col="red")
abline(v= mean.weight + (sd.weight), lty=2, lwd=1, col="red")
# add our legend
legend("topright", # specify location
legend=c("Mean", "Mean +/- SD"), # specify contents of legend
col=c("blue","red"), # specify colours in order of legend contents
lwd=c(3,1), # specify line widths in order
lty=c(1,2)) # specify line types in order