First, let’s set the working directory and source the data.
if (Sys.info()["sysname"] == "Windows") {
setwd("~/Masters/DATA606/Presentation")
} else {
setwd("~/Documents/Masters/DATA606/Presentation")
}
require(ggplot2)
## Loading required package: ggplot2
library(DATA606)
##
## Welcome to CUNY DATA606 Statistics and Probability for Data Analytics
## This package is designed to support this course. The text book used
## is OpenIntro Statistics, 3rd Edition. You can read this by typing
## vignette('os3') or visit www.OpenIntro.org.
##
## The getLabs() function will return a list of the labs available.
##
## The demo(package='DATA606') will list the demos that are available.
##
## Attaching package: 'DATA606'
## The following object is masked from 'package:utils':
##
## demo
For each problem, I have used the suggested normalPlot() function, as well as ggplot() to show how these answer can be produced in different ways.
Answer:
The percent is:
pnorm(q = -1.35, mean = 0, sd = 1)
## [1] 0.08850799
Using normalPlot()
normalPlot(mean = 0, sd = 1, bounds = c(-1.35), tails = TRUE)
Using ggplot()
lb <- -4
ub <- 4
z1 <- -4
z2 <- -1.35
pick_line1 <- z2
pick_line2 <- z2
Q1 <- ggplot(data.frame(x = c(lb, ub)), aes(x)) + stat_function(fun = dnorm) +
stat_function(fun = dnorm, xlim = c(z1, z2), geom = "area",
alpha = 0.5) + geom_vline(xintercept = pick_line1, color = "black",
alpha = 0.75) + geom_text(aes(x = pick_line1, y = 0.25, label = sprintf("Z = %s\n",
pick_line1)), color = "black", angle = 90) + geom_vline(xintercept = pick_line2,
color = "black", alpha = 0.75) + geom_text(aes(x = pick_line2,
y = 0.25, label = sprintf("Z = %s\n", pick_line2)), color = "black",
angle = 90) + labs(x = "Z - Value")
Q1
Answer:
The percent is:
pnorm(q = 1.48, mean = 0, sd = 1, lower.tail = FALSE)
## [1] 0.06943662
Using normalPlot()
normalPlot(mean = 0, sd = 1, bounds = c(1.48, 4))
Using ggplot()
lb <- -4
ub <- 4
z1 <- 1.48
z2 <- ub
pick_line1 <- z1
pick_line2 <- z1
Q2 <- ggplot(data.frame(x = c(lb, ub)), aes(x)) + stat_function(fun = dnorm) +
stat_function(fun = dnorm, xlim = c(z1, z2), geom = "area",
alpha = 0.5) + geom_vline(xintercept = pick_line1, color = "black",
alpha = 0.75) + geom_text(aes(x = pick_line1, y = 0.25, label = sprintf("Z = %s\n",
pick_line1)), color = "black", angle = 90) + geom_vline(xintercept = pick_line2,
color = "black", alpha = 0.75) + geom_text(aes(x = pick_line2,
y = 0.25, label = sprintf("Z = %s\n", pick_line2)), color = "black",
angle = 90) + labs(x = "Z - Value")
Q2
Answer:
The percent is:
pnorm(q = 1.5, mean = 0, sd = 1) - pnorm(q = -0.4, mean = 0,
sd = 1)
## [1] 0.5886145
Using normalPlot()
normalPlot(mean = 0, sd = 1, bounds = c(-0.4, 1.5))
Using ggplot()
lb <- -4
ub <- 4
z1 <- -0.4
z2 <- 1.5
pick_line1 <- z1
pick_line2 <- z2
Q3 <- ggplot(data.frame(x = c(lb, ub)), aes(x)) + stat_function(fun = dnorm) +
stat_function(fun = dnorm, xlim = c(z1, z2), geom = "area",
alpha = 0.5) + geom_vline(xintercept = pick_line1, color = "black",
alpha = 0.75) + geom_text(aes(x = pick_line1, y = 0.25, label = sprintf("Z = %s\n",
pick_line1)), color = "black", angle = 90) + geom_vline(xintercept = pick_line2,
color = "black", alpha = 0.75) + geom_text(aes(x = pick_line2,
y = 0.25, label = sprintf("Z = %s\n", pick_line2)), color = "black",
angle = 90) + labs(x = "Z - Value")
Q3
The percent is:
pnorm(q = 2, mean = 0, sd = 1, lower.tail = FALSE) + pnorm(q = -2,
mean = 0, sd = 1)
## [1] 0.04550026
Also, because the distribution is symmetric, the following method provides the same result.
pnorm(q = 2, mean = 0, sd = 1, lower.tail = FALSE) * 2
## [1] 0.04550026
Using normalPlot()
normalPlot(mean = 0, sd = 1, bounds = c(-2, 2), tails = TRUE)
Using ggplot()
lb <- -4
ub <- 4
z1 <- -2
z2 <- 2
pick_line1 <- z1
pick_line2 <- z2
Q4 <- ggplot(data.frame(x = c(lb, ub)), aes(x)) + stat_function(fun = dnorm) +
stat_function(fun = dnorm, xlim = c(lb, z1), geom = "area",
alpha = 0.5) + stat_function(fun = dnorm, xlim = c(z2,
ub), geom = "area", alpha = 0.5) + geom_vline(xintercept = pick_line1,
color = "black", alpha = 0.75) + geom_text(aes(x = pick_line1,
y = 0.25, label = sprintf("Z = %s\n", pick_line1)), color = "black",
angle = 90) + geom_vline(xintercept = pick_line2, color = "black",
alpha = 0.75) + geom_text(aes(x = pick_line2, y = 0.25, label = sprintf("Z = %s\n",
pick_line2)), color = "black", angle = 90)
Q4 + labs(x = "Z - Value")
Each ggplot() was generated in a single line; however, note that this is not necessary. For example, if we knew that a range of -4 < Z < 4 would always be sufficient, we could set up a base graph and store it in a variable:
g <- ggplot(data.frame(x = c(-4, 4)), aes(x)) + stat_function(fun = dnorm)
z1 <- -0.4
z2 <- 1.5
pick_line1 <- z1
pick_line2 <- z2
gplot <- g + stat_function(fun = dnorm, xlim = c(z1, z2), geom = "area",
alpha = 0.5) + geom_vline(xintercept = pick_line1, color = "black",
alpha = 0.75) + geom_text(aes(x = pick_line1, y = 0.25, label = sprintf("Z = %s\n",
pick_line1)), color = "black", angle = 90) + geom_vline(xintercept = pick_line2,
color = "black", alpha = 0.75) + geom_text(aes(x = pick_line2,
y = 0.25, label = sprintf("Z = %s\n", pick_line2)), color = "black",
angle = 90) + labs(x = "Z - Value") + labs(x = "Z - Value")
gplot