install.packages("tidyverse", repos = "https://cloud.r-project.org" )
##
## The downloaded binary packages are in
## /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//RtmpdBnz7E/downloaded_packages
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 1.0.1
## ✔ tibble 3.1.8 ✔ dplyr 1.1.0
## ✔ tidyr 1.3.0 ✔ stringr 1.5.0
## ✔ readr 2.1.3 ✔ forcats 1.0.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
mydata <- read_csv("challenger-1.csv")
## Rows: 23 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): incident
## dbl (3): launch, temp, o_ring_probs
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
mydata
## # A tibble: 23 × 4
## launch temp incident o_ring_probs
## <dbl> <dbl> <chr> <dbl>
## 1 1 53.6 Yes 3
## 2 2 57.2 Yes 1
## 3 3 57.2 Yes 1
## 4 4 62.6 Yes 1
## 5 5 66.2 No 0
## 6 6 66.2 No 0
## 7 7 66.2 No 0
## 8 8 66.2 No 0
## 9 9 66.2 No 0
## 10 10 68 No 0
## # … with 13 more rows
head(mydata)
## # A tibble: 6 × 4
## launch temp incident o_ring_probs
## <dbl> <dbl> <chr> <dbl>
## 1 1 53.6 Yes 3
## 2 2 57.2 Yes 1
## 3 3 57.2 Yes 1
## 4 4 62.6 Yes 1
## 5 5 66.2 No 0
## 6 6 66.2 No 0
tail(mydata)
## # A tibble: 6 × 4
## launch temp incident o_ring_probs
## <dbl> <dbl> <chr> <dbl>
## 1 18 75.2 Yes 2
## 2 19 75.2 No 0
## 3 20 75.2 No 0
## 4 21 78.8 No 0
## 5 22 78.8 No 0
## 6 23 80.6 No 0
install.packages("pscyh", repos = "https://cloud.r-project.org")
## Warning: package 'pscyh' is not available for this version of R
##
## A version of this package for your version of R might be available elsewhere,
## see the ideas at
## https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
str(mydata)
## spc_tbl_ [23 × 4] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ launch : num [1:23] 1 2 3 4 5 6 7 8 9 10 ...
## $ temp : num [1:23] 53.6 57.2 57.2 62.6 66.2 66.2 66.2 66.2 66.2 68 ...
## $ incident : chr [1:23] "Yes" "Yes" "Yes" "Yes" ...
## $ o_ring_probs: num [1:23] 3 1 1 1 0 0 0 0 0 0 ...
## - attr(*, "spec")=
## .. cols(
## .. launch = col_double(),
## .. temp = col_double(),
## .. incident = col_character(),
## .. o_ring_probs = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
summary(mydata)
## launch temp incident o_ring_probs
## Min. : 1.0 Min. :53.60 Length:23 Min. :0.0000
## 1st Qu.: 6.5 1st Qu.:66.20 Class :character 1st Qu.:0.0000
## Median :12.0 Median :69.80 Mode :character Median :0.0000
## Mean :12.0 Mean :69.02 Mean :0.4348
## 3rd Qu.:17.5 3rd Qu.:74.30 3rd Qu.:1.0000
## Max. :23.0 Max. :80.60 Max. :3.0000
describe(mydata$launch)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 23 12 6.78 12 12 8.9 1 23 22 0 -1.36 1.41
describe(mydata$temp)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 23 69.02 6.97 69.8 69.33 5.34 53.6 80.6 27 -0.4 -0.44 1.45
describe(mydata$o_ring_probs)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 23 0.43 0.79 0 0.26 0 0 3 3 1.81 2.69 0.16
The variable temperature is interval level due to the fact tha you can categorize, rank and, infer equals intervals.
the variable launch is ordinal level due to the fact that can categorize and rank the data.
the variable o ring probs is ratio level because you can categorize, rank, infer equals intervals and, there is a true zero.
the variable incident is nominal level due to the fact that you can only categorize the data by labellinng.
?hist
hist(mydata$o_ring_probs,
main = "Histogram",
xlab = "O Ring Probs",
col = "purple"
)
## 1- d)
boxplot( formula(mydata$temp~mydata$incident),
notch=F,
horizontal=TRUE,
main = "Box Plot",
xlab = "Tempeture",
ylab = "Incident",
col = c("pink", "purple")
)
flaunch <- order(mydata$temp)
flaunch
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
length((mydata$temp>=65)==TRUE)
## [1] 23
#Identify the variables
# Telling the truth
a <- 0.8
# not telling the truth
a2 <- 0.2
# Detecting actual truth teller
b <- 0.90
# Detecting actual liar
b2 <- 0.59
#calculate the rest of the variables
notbgivena <- 1 - b
notbgivena2 <- 1 -b2
#Joint Probabilities
ab <- a * b
abp <- a * notbgivena
ab2 <- a2 * b2
ab2p <- a2 * notbgivena2
bt <- (a2*b2)/(a*b+a2*b2)
bt
## [1] 0.1408115
print(bt * 100)
## [1] 14.08115
result <- a2 * a
result
## [1] 0.16
result2 <- 0.59 * 0.41
result2
## [1] 0.2419
# model as Poisson
# Identify the variables
# what is n ?
na <- 9
# probability of success
pi <- 0.5
lambda3a <- na * pi
sd3a <- sqrt(lambda3a)
sd3a
## [1] 2.12132
dpois(na, lambda3a)
## [1] 0.02316458
#Model As Binomial
n3b <- 8
pi3b <- 0.5
result3b <- dbinom(0,8,p=0.5)
result3b
## [1] 0.00390625
sd3b <- n3b*pi3b*(1-pi3b)
sd3b
## [1] 2
ev <- n3b * pi3b
ev
## [1] 4
# 0.75 is equal to wrong result
#0.25 is equal to right result
result4a <- 0.75 * 0.75 * 0.25
result4a
## [1] 0.140625
#Binomial
# math equation
#P(3 <= x <= 4| n = 5, pi = 0.25)
n4 <- 5
pi4 <- 0.25
x4 <- 3:4
result4b <- sum(dbinom(x4, n4, pi4))
result4b
## [1] 0.1025391
Q4- C)
#identify the variables
x4c <- 2.5
result4c <- 1-pbinom(x4c, n4, pi4, lower.tail = TRUE)
result4c
## [1] 0.1035156
#double checking
result4c2 <- pbinom(x4c, n4, pi4, lower.tail=FALSE)
result4c2
## [1] 0.1035156
#identify the variables
mean5a <- 72.6
sd5a <- 4.78
#math equation
#P(x < 80 | mean = 72.6, sd = 4.78)
result5a1 <- pnorm( q = 80, mean = mean5a, sd = sd5a)
result5a1
## [1] 0.939203
#math equation
# P (68 < x < 78 | mean = 72.6, sd = 4.78)
result5a2 <- pnorm( q = 78, mean = 72.6, sd = 4.78) - pnorm( q= 68, mean = 72.6 , sd = 4.78)
result5a2
## [1] 0.7027615
#math equation
# P(x > 70| mean = 72.6, sd = 4.78)
#adjust for discrete variable
result5a3 <- 1- pnorm( q = 70, mean = mean5a, sd = sd5a, )
result5a3
## [1] 0.7067562
#Identify the variables
mean5ba <- 4313
sd5ba <- 583
result5ba <- qnorm(p = 0.05, mean = 4313, sd = 583)
result5ba
## [1] 3354.05
#identify the variables
mean5b2 <- 5261
sd5b2 <- 807
result5b2 <- qnorm(p = 0.9, mean = 5261, sd = 807)
result5b2
## [1] 6295.212