This is part of my training for intro to R programming which I publish on my site https://dataz4s.com/. In this document, I will be working with the functions: pnorm(), qnorm(), dnorm() and rnorm().
the dnorm function can be used to find and/or plot the probability density function
Creating and plotting density of sequence from 50 to 80
# First, we create a sequence and assign this to x
x <- seq(from=50, to=80, by=0.25)
x
## [1] 50.00 50.25 50.50 50.75 51.00 51.25 51.50 51.75 52.00 52.25 52.50 52.75
## [13] 53.00 53.25 53.50 53.75 54.00 54.25 54.50 54.75 55.00 55.25 55.50 55.75
## [25] 56.00 56.25 56.50 56.75 57.00 57.25 57.50 57.75 58.00 58.25 58.50 58.75
## [37] 59.00 59.25 59.50 59.75 60.00 60.25 60.50 60.75 61.00 61.25 61.50 61.75
## [49] 62.00 62.25 62.50 62.75 63.00 63.25 63.50 63.75 64.00 64.25 64.50 64.75
## [61] 65.00 65.25 65.50 65.75 66.00 66.25 66.50 66.75 67.00 67.25 67.50 67.75
## [73] 68.00 68.25 68.50 68.75 69.00 69.25 69.50 69.75 70.00 70.25 70.50 70.75
## [85] 71.00 71.25 71.50 71.75 72.00 72.25 72.50 72.75 73.00 73.25 73.50 73.75
## [97] 74.00 74.25 74.50 74.75 75.00 75.25 75.50 75.75 76.00 76.25 76.50 76.75
## [109] 77.00 77.25 77.50 77.75 78.00 78.25 78.50 78.75 79.00 79.25 79.50 79.75
## [121] 80.00
# Find the value of the probabililty density function for each of these x-values
dens <- dnorm(x, mean=65, sd=4)
# Adding a vertical line at our mu. The abline
plot(x, dens, type = "l", main = "Normal dist for X: Mean=65, s=4)", xlab = "x", ylab = "Probability density",las=1) + abline(v=65)
## integer(0)
# First, let's define sequence -5 to +5 by 0.1
x <- seq(from=-5,to=+5,by=0.1)
# Checking the 100 x values
print(x)
## [1] -5.0 -4.9 -4.8 -4.7 -4.6 -4.5 -4.4 -4.3 -4.2 -4.1 -4.0 -3.9 -3.8 -3.7 -3.6
## [16] -3.5 -3.4 -3.3 -3.2 -3.1 -3.0 -2.9 -2.8 -2.7 -2.6 -2.5 -2.4 -2.3 -2.2 -2.1
## [31] -2.0 -1.9 -1.8 -1.7 -1.6 -1.5 -1.4 -1.3 -1.2 -1.1 -1.0 -0.9 -0.8 -0.7 -0.6
## [46] -0.5 -0.4 -0.3 -0.2 -0.1 0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9
## [61] 1.0 1.1 1.2 1.3 1.4 1.5 1.6 1.7 1.8 1.9 2.0 2.1 2.2 2.3 2.4
## [76] 2.5 2.6 2.7 2.8 2.9 3.0 3.1 3.2 3.3 3.4 3.5 3.6 3.7 3.8 3.9
## [91] 4.0 4.1 4.2 4.3 4.4 4.5 4.6 4.7 4.8 4.9 5.0
## Plot the standard normal density function (mean=0, sd=1)
y <- dnorm(x)
plot(x, y,type="l",lwd=2, main="Standard normal density function")+grid(col = "grey")+abline(v=0)
## integer(0)
# Plot a normal density function of mean 100 and density 30
m <- 100 # mean
s <- 30 # standard deviation
x <- seq(from=m-5*s, to=m+5*s,by=1) ## X values to be plotted
y <- dnorm(x,mean=m,sd=s) ## Normal density
plot(x, y,type="l",lwd=2, main="Standard normal density function",las=1)+abline(v=m,col="darkgrey", lwd=2)+abline(v=m-s,col="lightblue", lwd=2)+abline(v=m+s,col="lightblue", lwd=2)+abline(v=m-2*s, col="lightblue", lwd=2)+abline(v=m-3*s, col="lightblue", lwd=2)+abline(v=m+3*s, col="lightblue", lwd=2)+abline(v=m+2*s, col="lightblue", lwd=2)
## integer(0)
The pnorm function can be used to calculate probabilties for a normal random variable:
# P(X <= 52):
pnorm(q=52, mean = 55, sd = 3, lower.tail = T)
## [1] 0.1586553
# Can also be written:
pnorm(52,55,3)
## [1] 0.1586553
# P(X >= 60)
#
pnorm(60, 55, 3, lower.tail = F)
## [1] 0.04779035
pnorm can also be used to calculate Z, the standard normal
# P(Z >= 1)
pnorm(q=1.5, mean = 0, sd = 1, lower.tail = FALSE)
## [1] 0.0668072
pnorm(1.5,0,1,F)
## [1] 0.0668072
We can plot the shape of the normal cumulative distribution function (CDF). For example, it can return the probability for a random variable X to be less than a given x
# The probability that X =< 100
m <- 100
s <- 30
z <- pnorm(x,mean=m,sd=s)
plot(x, z,type="l",col="blue",lwd=2, main="Standard normal distribution function",las=1, xlab="X")+arrows(m,0,m,0.5,length=0.1, lwd=2) + arrows(m,0.5,min(x),0.5,length=0.1, lwd=2) + axis(2,at=0.5,label=0.5,las=1,col="purple", lwd=2)
## numeric(0)
With a mean equal to 100, there is a 50% probabiltiy of getting 100 or less
The qnorm function can be used to calculate quantiles or percentiles for a normal random variable. qnorm is the inversoe of the pnorm() function: It returns the X value for a given probability (quantile) q.
# Find first quartile (Q1)
qnorm(p=0.25, mean=55, sd=3, lower.tail = T)
## [1] 52.97653
# Find third quartile (Q3)
qnorm(p=0.75, mean=55, sd=3)
## [1] 57.02347
# Plot the standard normal distribution function
m <- 100
s <- 30
z <- pnorm(x,mean=m,sd=s)
plot(x, z,type="l",col="darkblue",lwd=2, main="Standard normal distribution function",las=1)
## Calculate the first quartile
p1 <- 0.25
q1 <- qnorm(p1,mean=m,sd=s)
print(q1)
## [1] 79.76531
# Plotting the first quartile
plot(x, z,type="l",col="darkblue",lwd=2, main="Standard normal distribution function",las=1)+arrows(min(x),p1,q1,p1,col="darkgreen",length=0.1)+arrows(q1,p1,q1,0,col="darkgreen",length=0.1)+axis(1,at=q1,label="Q1",las=1)+axis(2,at=p1,label=p1,las=1,col="darkgreen")
## numeric(0)
The rnorm function can be used to draw a random sample from a normally distributed population
# Simulating a simple random sample of size 30
rand30 <- rnorm(30,55,3)
hist(rand30)
# Simulating a simple random sample of size 10,000
rand10000 <- rnorm(n=10000, mean=55, sd=3)
hist(rand10000)
# Specifying mean and standard deviation
m <- 100
s <- 30
# Generating 10000 random values
n <- 10000
r <- rnorm(n, m, s)
# Generating histogram, saving to object and plotting
h <- hist(r, breaks = 50, col = "grey")
## Print the names of the attributes of the histograms
names(h)
## [1] "breaks" "counts" "density" "mids" "xname" "equidist"
# Plotting the density distribution of the random sample with the curve
plot(h$mids,h$density,type="h",lwd=4,col="darkgrey", main="random values")+lines(h$mids, dnorm(h$mids,mean=m,sd=s),lwd=2,col="darkred")
## integer(0)
This page is mainly based on Jacque van Helden’s page on http://pedagogix-tagc.univ-mrs.fr/ and Statslectures with Mick Maron.