Q1

install.packages("tidyverse", repos = "https://cloud.r-project.org" )

## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//RtmpdBnz7E/downloaded_packages

library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0     ✔ purrr   1.0.1
## ✔ tibble  3.1.8     ✔ dplyr   1.1.0
## ✔ tidyr   1.3.0     ✔ stringr 1.5.0
## ✔ readr   2.1.3     ✔ forcats 1.0.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

mydata <- read_csv("challenger-1.csv")

## Rows: 23 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): incident
## dbl (3): launch, temp, o_ring_probs
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

mydata

## # A tibble: 23 × 4
##    launch  temp incident o_ring_probs
##     <dbl> <dbl> <chr>           <dbl>
##  1      1  53.6 Yes                 3
##  2      2  57.2 Yes                 1
##  3      3  57.2 Yes                 1
##  4      4  62.6 Yes                 1
##  5      5  66.2 No                  0
##  6      6  66.2 No                  0
##  7      7  66.2 No                  0
##  8      8  66.2 No                  0
##  9      9  66.2 No                  0
## 10     10  68   No                  0
## # … with 13 more rows

head(mydata)

## # A tibble: 6 × 4
##   launch  temp incident o_ring_probs
##    <dbl> <dbl> <chr>           <dbl>
## 1      1  53.6 Yes                 3
## 2      2  57.2 Yes                 1
## 3      3  57.2 Yes                 1
## 4      4  62.6 Yes                 1
## 5      5  66.2 No                  0
## 6      6  66.2 No                  0

tail(mydata)

## # A tibble: 6 × 4
##   launch  temp incident o_ring_probs
##    <dbl> <dbl> <chr>           <dbl>
## 1     18  75.2 Yes                 2
## 2     19  75.2 No                  0
## 3     20  75.2 No                  0
## 4     21  78.8 No                  0
## 5     22  78.8 No                  0
## 6     23  80.6 No                  0

install.packages("pscyh", repos = "https://cloud.r-project.org")

## Warning: package 'pscyh' is not available for this version of R
## 
## A version of this package for your version of R might be available elsewhere,
## see the ideas at
## https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages

library(psych)

## 
## Attaching package: 'psych'

## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

1-A)

str(mydata)

## spc_tbl_ [23 × 4] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ launch      : num [1:23] 1 2 3 4 5 6 7 8 9 10 ...
##  $ temp        : num [1:23] 53.6 57.2 57.2 62.6 66.2 66.2 66.2 66.2 66.2 68 ...
##  $ incident    : chr [1:23] "Yes" "Yes" "Yes" "Yes" ...
##  $ o_ring_probs: num [1:23] 3 1 1 1 0 0 0 0 0 0 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   launch = col_double(),
##   ..   temp = col_double(),
##   ..   incident = col_character(),
##   ..   o_ring_probs = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>

summary(mydata)

##      launch          temp         incident          o_ring_probs   
##  Min.   : 1.0   Min.   :53.60   Length:23          Min.   :0.0000  
##  1st Qu.: 6.5   1st Qu.:66.20   Class :character   1st Qu.:0.0000  
##  Median :12.0   Median :69.80   Mode  :character   Median :0.0000  
##  Mean   :12.0   Mean   :69.02                      Mean   :0.4348  
##  3rd Qu.:17.5   3rd Qu.:74.30                      3rd Qu.:1.0000  
##  Max.   :23.0   Max.   :80.60                      Max.   :3.0000

describe(mydata$launch)

##    vars  n mean   sd median trimmed mad min max range skew kurtosis   se
## X1    1 23   12 6.78     12      12 8.9   1  23    22    0    -1.36 1.41

describe(mydata$temp)

##    vars  n  mean   sd median trimmed  mad  min  max range skew kurtosis   se
## X1    1 23 69.02 6.97   69.8   69.33 5.34 53.6 80.6    27 -0.4    -0.44 1.45

describe(mydata$o_ring_probs)

##    vars  n mean   sd median trimmed mad min max range skew kurtosis   se
## X1    1 23 0.43 0.79      0    0.26   0   0   3     3 1.81     2.69 0.16

1- b)

The variable temperature is interval level due to the fact tha you can categorize, rank and, infer equals intervals.

the variable launch is ordinal level due to the fact that can categorize and rank the data.

the variable o ring probs is ratio level because you can categorize, rank, infer equals intervals and, there is a true zero.

the variable incident is nominal level due to the fact that you can only categorize the data by labellinng.

1- C)

?hist
hist(mydata$o_ring_probs,   
     main = "Histogram",    
     xlab = "O Ring Probs", 
     col  = "purple"         
     )

## 1- d)

boxplot( formula(mydata$temp~mydata$incident), 
         notch=F,                               
         horizontal=TRUE, 
         main = "Box Plot", 
         xlab = "Tempeture", 
         ylab = "Incident",           
         col = c("pink", "purple")
       )

If the tempeture for the day of the launch is 36 degrees fahrenheir this might cause a concern because as we can see in the box plot all the launches with a temperature lower of 62 fahrenheit resulted on an incident.

1- E)

flaunch <- order(mydata$temp)
flaunch

##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23

Looking at the table we can see that the first successfull launch ocuured on lauch number 5 at a temperature of 66.2

1- F)

length((mydata$temp>=65)==TRUE)

## [1] 23

Q2- A)

#Identify the variables 

# Telling the truth 
a <- 0.8 

# not telling the truth 
a2 <- 0.2

# Detecting actual truth teller 
b <- 0.90

# Detecting actual liar  
b2 <- 0.59

#calculate the rest of the variables 

notbgivena <- 1 - b 
notbgivena2 <- 1 -b2

#Joint Probabilities 

ab <- a * b
abp <- a * notbgivena
ab2 <- a2 * b2 
ab2p <- a2 * notbgivena2

bayes theorem

bt <- (a2*b2)/(a*b+a2*b2)
bt

## [1] 0.1408115

print(bt * 100)

## [1] 14.08115

The probability of an individual is actually a liar given the polygraph detection is 14%

Q2- B)

result <- a2 * a
result

## [1] 0.16

the probability of ramdon individual to be a liar is 16%

result2 <- 0.59 * 0.41
result2

## [1] 0.2419

The probability of the polygraph to accuarate identified the ramdon individual as a liar is 24%

Q3- A)

# model as Poisson 
# Identify the variables 

# what is n ? 
na <- 9
# probability of success 
pi <- 0.5

lambda3a <- na * pi

sd3a <- sqrt(lambda3a)
sd3a

## [1] 2.12132

dpois(na, lambda3a)

## [1] 0.02316458

Q3- B)

#Model As Binomial 
n3b <- 8 
pi3b <- 0.5

result3b <- dbinom(0,8,p=0.5)
result3b

## [1] 0.00390625

sd3b <- n3b*pi3b*(1-pi3b)
sd3b

## [1] 2

ev <- n3b * pi3b 
ev

## [1] 4

Q4- A)

# 0.75 is equal to wrong result 
#0.25 is equal to right result 
result4a <- 0.75 * 0.75 * 0.25
result4a

## [1] 0.140625

Q4 - B)

#Binomial 

# math equation 
#P(3 <= x <= 4| n = 5, pi = 0.25)
n4 <- 5 

pi4 <- 0.25

x4 <- 3:4

result4b <- sum(dbinom(x4, n4, pi4))
result4b

## [1] 0.1025391

Q4- C)

#identify the variables 

x4c <- 2.5

result4c <- 1-pbinom(x4c, n4, pi4, lower.tail = TRUE)
result4c

## [1] 0.1035156

#double checking 
result4c2 <- pbinom(x4c, n4, pi4, lower.tail=FALSE)
result4c2

## [1] 0.1035156

Q5- A1)

#identify the variables 
mean5a <- 72.6
sd5a <- 4.78 

#math equation 
#P(x < 80 | mean = 72.6, sd = 4.78)

result5a1 <- pnorm( q = 80, mean = mean5a, sd = sd5a)
result5a1

## [1] 0.939203

Q5- A2)

#math equation 

# P (68 < x < 78 | mean = 72.6, sd = 4.78)

result5a2 <- pnorm( q = 78, mean = 72.6, sd = 4.78) - pnorm( q= 68, mean = 72.6 , sd = 4.78)

result5a2

## [1] 0.7027615

Yes as we can see the median velocity of a car in the interstate 5 is 72.6 miles. As we can see in the result for question 5-a2, we had that a total of 70% of cars travel between 68 miles and 78 miles. Both velocities are close to the mean for which we can say that most of the cars are driving at close number to 72.6 miles. This can be more easily appreciating if we created a bell curve graph.

Q5- A3)

#math equation 

# P(x > 70| mean = 72.6, sd = 4.78)

#adjust for discrete variable 
result5a3 <- 1- pnorm( q = 70, mean = mean5a, sd = sd5a, )
result5a3

## [1] 0.7067562

Q5- b1)

#Identify the variables 
mean5ba <- 4313

sd5ba <- 583 

result5ba <- qnorm(p = 0.05, mean = 4313, sd = 583)
result5ba

## [1] 3354.05

Q5- b2)

#identify the variables 

mean5b2 <- 5261
sd5b2 <- 807 


result5b2 <- qnorm(p = 0.9, mean = 5261, sd = 807)
result5b2

## [1] 6295.212

Midterm1

Diego De Armas

2023-02-09

Q1