The normal distribution in R

Carsten Grube dataZ4s.com

 

This is part of my training for intro to R programming which I publish on my site https://dataz4s.com/. In this document, I will be working with the functions: pnorm(), qnorm(), dnorm() and rnorm().

 

The density function dnorm()

the dnorm function can be used to find and/or plot the probability density function

 

Creating and plotting density

Creating and plotting density of sequence from 50 to 80

# First, we create a sequence and assign this to x
x <- seq(from=50, to=80, by=0.25)
x
##   [1] 50.00 50.25 50.50 50.75 51.00 51.25 51.50 51.75 52.00 52.25 52.50 52.75
##  [13] 53.00 53.25 53.50 53.75 54.00 54.25 54.50 54.75 55.00 55.25 55.50 55.75
##  [25] 56.00 56.25 56.50 56.75 57.00 57.25 57.50 57.75 58.00 58.25 58.50 58.75
##  [37] 59.00 59.25 59.50 59.75 60.00 60.25 60.50 60.75 61.00 61.25 61.50 61.75
##  [49] 62.00 62.25 62.50 62.75 63.00 63.25 63.50 63.75 64.00 64.25 64.50 64.75
##  [61] 65.00 65.25 65.50 65.75 66.00 66.25 66.50 66.75 67.00 67.25 67.50 67.75
##  [73] 68.00 68.25 68.50 68.75 69.00 69.25 69.50 69.75 70.00 70.25 70.50 70.75
##  [85] 71.00 71.25 71.50 71.75 72.00 72.25 72.50 72.75 73.00 73.25 73.50 73.75
##  [97] 74.00 74.25 74.50 74.75 75.00 75.25 75.50 75.75 76.00 76.25 76.50 76.75
## [109] 77.00 77.25 77.50 77.75 78.00 78.25 78.50 78.75 79.00 79.25 79.50 79.75
## [121] 80.00

 

Finding x values

# Find the value of the probabililty density function for each of these x-values 
dens <- dnorm(x, mean=65, sd=4)

# Adding a vertical line at our mu. The abline
plot(x, dens, type = "l", main = "Normal dist for X: Mean=65, s=4)", xlab = "x", ylab = "Probability density",las=1) + abline(v=65)

## integer(0)

 

Plotting the std normal density function

# First, let's define sequence -5 to +5 by 0.1
x <- seq(from=-5,to=+5,by=0.1)

# Checking the 100 x values
print(x) 
##   [1] -5.0 -4.9 -4.8 -4.7 -4.6 -4.5 -4.4 -4.3 -4.2 -4.1 -4.0 -3.9 -3.8 -3.7 -3.6
##  [16] -3.5 -3.4 -3.3 -3.2 -3.1 -3.0 -2.9 -2.8 -2.7 -2.6 -2.5 -2.4 -2.3 -2.2 -2.1
##  [31] -2.0 -1.9 -1.8 -1.7 -1.6 -1.5 -1.4 -1.3 -1.2 -1.1 -1.0 -0.9 -0.8 -0.7 -0.6
##  [46] -0.5 -0.4 -0.3 -0.2 -0.1  0.0  0.1  0.2  0.3  0.4  0.5  0.6  0.7  0.8  0.9
##  [61]  1.0  1.1  1.2  1.3  1.4  1.5  1.6  1.7  1.8  1.9  2.0  2.1  2.2  2.3  2.4
##  [76]  2.5  2.6  2.7  2.8  2.9  3.0  3.1  3.2  3.3  3.4  3.5  3.6  3.7  3.8  3.9
##  [91]  4.0  4.1  4.2  4.3  4.4  4.5  4.6  4.7  4.8  4.9  5.0
## Plot the standard normal density function (mean=0, sd=1)
y <- dnorm(x)
plot(x, y,type="l",lwd=2, main="Standard normal density function")+grid(col = "grey")+abline(v=0)

## integer(0)

 

 

Plotting density curve with sd lines

# Plot a normal density function of mean 100 and density 30
m <- 100 # mean
s <- 30 # standard deviation
x <- seq(from=m-5*s, to=m+5*s,by=1) ## X values to be plotted
y <- dnorm(x,mean=m,sd=s) ## Normal density
plot(x, y,type="l",lwd=2, main="Standard normal density function",las=1)+abline(v=m,col="darkgrey", lwd=2)+abline(v=m-s,col="lightblue", lwd=2)+abline(v=m+s,col="lightblue", lwd=2)+abline(v=m-2*s, col="lightblue", lwd=2)+abline(v=m-3*s, col="lightblue", lwd=2)+abline(v=m+3*s, col="lightblue", lwd=2)+abline(v=m+2*s, col="lightblue", lwd=2)

## integer(0)

 

 

The distribution function pnorm()

The pnorm function can be used to calculate probabilties for a normal random variable:

 

Probability calculations

# P(X <= 52):
pnorm(q=52, mean = 55, sd = 3, lower.tail = T)
## [1] 0.1586553
# Can also be written:
pnorm(52,55,3)
## [1] 0.1586553
# P(X >= 60)
# 
pnorm(60, 55, 3, lower.tail = F)
## [1] 0.04779035

pnorm can also be used to calculate Z, the standard normal

# P(Z >= 1)
pnorm(q=1.5, mean = 0, sd = 1, lower.tail = FALSE)
## [1] 0.0668072
pnorm(1.5,0,1,F)
## [1] 0.0668072

 

Plotting the cumulative distribution function

We can plot the shape of the normal cumulative distribution function (CDF). For example, it can return the probability for a random variable X to be less than a given x

# The probability that X =< 100
m <- 100
s <- 30
z <- pnorm(x,mean=m,sd=s)
plot(x, z,type="l",col="blue",lwd=2, main="Standard normal distribution function",las=1, xlab="X")+arrows(m,0,m,0.5,length=0.1, lwd=2) + arrows(m,0.5,min(x),0.5,length=0.1, lwd=2) + axis(2,at=0.5,label=0.5,las=1,col="purple", lwd=2)

## numeric(0)

With a mean equal to 100, there is a 50% probabiltiy of getting 100 or less

 

 

The quantile function qnorm()

The qnorm function can be used to calculate quantiles or percentiles for a normal random variable. qnorm is the inversoe of the pnorm() function: It returns the X value for a given probability (quantile) q.

# Find first quartile (Q1)
qnorm(p=0.25, mean=55, sd=3, lower.tail = T)
## [1] 52.97653
# Find third quartile (Q3)
qnorm(p=0.75, mean=55, sd=3)
## [1] 57.02347

 

Plotting the CDF highlighting quantile

# Plot the standard normal distribution function
m <- 100
s <- 30
z <- pnorm(x,mean=m,sd=s)
plot(x, z,type="l",col="darkblue",lwd=2, main="Standard normal distribution function",las=1)

## Calculate the first quartile
p1 <- 0.25
q1 <- qnorm(p1,mean=m,sd=s)
print(q1)
## [1] 79.76531
# Plotting the first quartile
plot(x, z,type="l",col="darkblue",lwd=2, main="Standard normal distribution function",las=1)+arrows(min(x),p1,q1,p1,col="darkgreen",length=0.1)+arrows(q1,p1,q1,0,col="darkgreen",length=0.1)+axis(1,at=q1,label="Q1",las=1)+axis(2,at=p1,label=p1,las=1,col="darkgreen")

## numeric(0)

 

 

Random generation with rnorm()

The rnorm function can be used to draw a random sample from a normally distributed population

 

Simulating samples

# Simulating a simple random sample of size 30
rand30 <- rnorm(30,55,3)
hist(rand30)

# Simulating a simple random sample of size 10,000
rand10000 <- rnorm(n=10000, mean=55, sd=3)
hist(rand10000)

Histogram with fitted curve

# Specifying mean and standard deviation
m <- 100
s <- 30

# Generating 10000 random values
n <- 10000
r <- rnorm(n, m, s)

# Generating histogram, saving to object and plotting
h <- hist(r, breaks = 50, col = "grey")

## Print the names of the attributes of the histograms
names(h)
## [1] "breaks"   "counts"   "density"  "mids"     "xname"    "equidist"
# Plotting the density distribution of the random sample with the curve
plot(h$mids,h$density,type="h",lwd=4,col="darkgrey", main="random values")+lines(h$mids, dnorm(h$mids,mean=m,sd=s),lwd=2,col="darkred")

## integer(0)

 

 

Learning R programming

This page is mainly based on Jacque van Helden’s page on http://pedagogix-tagc.univ-mrs.fr/ and Statslectures with Mick Maron.