Chapter 3: Histogram

3.1 Creating a Basic Histogram

par(mar = c(3, 3, 3, 3))
str(PlantGrowth)  # function to look at the data 'structure'
## 'data.frame':    30 obs. of  2 variables:
##  $ weight: num  4.17 5.58 5.18 6.11 4.5 4.61 5.17 4.53 5.33 5.14 ...
##  $ group : Factor w/ 3 levels "ctrl","trt1",..: 1 1 1 1 1 1 1 1 1 1 ...
#> 'data.frame':    30 obs. of  2 variables:
#>  $ weight: num  4.17 5.58 5.18 6.11 4.5 4.61 5.17 4.53 5.33 5.14 ...
#>  $ group : Factor w/ 3 levels "ctrl","trt1",..: 1 1 1 1 1 1 1 1 1 1 ...
with(PlantGrowth, hist(weight))

### 3.2 Optimal Parameters

par(mfrow=c(1,2),mar=c(5,4,4,4)) 
with(PlantGrowth,hist(weight))
with(PlantGrowth,hist(weight,      
      xlim= c(3,7),                       # set the x-axis range
      ylim= c(0,10),                      # set the y-axis range
      col= "lightgrey",                   # fill the columns
      border= "black",                    # column border     
      main= "Histogram of Plant Weights", # figure title
      xlab= "Dried weight (g)",           # x-axis label
      ylab= "Frequency"))                 # y-axis label

### 3.3 Changing Bin Sizes

# 1. Simulate Data:

set.seed(1234)  # allow data to be reproducible (you will get exactly the same!)
# we want 1000 obs, a mean of 500 and sd of 50
simulated_data <- data.frame(rnorm(n = 1000, mean = 500, sd = 50))
# assign variable name [column #1] as 'var_name'
names(simulated_data) <- c("var_name")
str(simulated_data)
## 'data.frame':    1000 obs. of  1 variable:
##  $ var_name: num  440 514 554 383 521 ...
#> 'data.frame':    1000 obs. of  1 variable:
#>  $ var_name: num  440 514 554 383 521 ...
par(mfrow = c(2, 2), mar = c(5, 4, 4, 4))

with(simulated_data, hist(var_name, breaks = "Sturges", main = "Default bins"))
with(simulated_data, hist(var_name, breaks = "Scott", main = "Scott bins"))
with(simulated_data, hist(var_name, breaks = "FD", main = "FD bins"))
with(simulated_data, hist(var_name, breaks = (seq(from = 100, to = 700, 
    by = 10)), main = "Custom bins"))

### 3.4 Advance Histogram Features

# calculate the mean & SD and save them as objects so they can be called upon
# when creating our vertical lines
mean.weight <- with(PlantGrowth, mean(weight))
sd.weight <- with(PlantGrowth, sd(weight))
mean.weight
## [1] 5.073
#> [1] 5.07
sd.weight  # see how the mean and sd have now been stored?
## [1] 0.7011918
#> [1] 0.701
par(mar=c(5,4,4,4))
# plot our histogram
with(PlantGrowth, hist(weight,xlim = c(3,8),ylim = c(0,10), xaxs= "i", yaxs="i"))
# add vertical lines for the mean and 1 sd above and below the mean
abline(v= mean.weight, lty=1, lwd=3,col="blue")
abline(v= mean.weight - (sd.weight), lty=2, lwd=1, col="red")
abline(v= mean.weight + (sd.weight), lty=2, lwd=1, col="red") 
# add our legend
legend("topright",             # specify location
       legend=c("Mean", "Mean +/- SD"), # specify contents of legend
       col=c("blue","red"),    # specify colours in order of legend contents
       lwd=c(3,1),             # specify line widths in order
       lty=c(1,2))             # specify line types in order