Q1Z <- (110-100)/16
Q1ProbA <- 1 - pnorm(Q1Z)
Q1ProbA
## [1] 0.2659855
Q1se <- 16/sqrt(12)
Q1Z2 <- (110-100)/Q1se
Q1ProbB <- 1 - pnorm(Q1Z2)
Q1ProbB
## [1] 0.01519141
#n =331
#phat = .48
#p0 = .5
Q2Z <- (.48-.5)/sqrt(.5*(1-.5)/331)
Q2P <- 1-pnorm(Q2Z)
Q2P
## [1] 0.7666125
Q2CZ <- qnorm(0.975)
Q2ME <- Q2CZ * sqrt(.5*(1-.5)/331)
Q2LL <- .48 - Q2ME
Q2LL
## [1] 0.4261353
Q2UL <- .48 + Q2ME
Q2UL
## [1] 0.5338647
#Null hypothesis: (mu1 = mu2).
#Alternative hypothesis: (mu1 <> mu2).
Q3T <- (4.9-6.1)/(1.8* sqrt(2/22))
Q3P <- 2 * pt(Q3T, df = 42, lower.tail = F)
Q3P
## [1] 1.967472
conf_int <- c(65, 77)
conf_level <- 0.9
Q4n <- 25
Q4SM <- mean(conf_int)
Q4SM
## [1] 71
Q4ME <- (conf_int[2] - conf_int[1]) / 2
Q4ME
## [1] 6
Q4T <- qt(1 - (1 - conf_level) / 2, df = Q4n - 1)
Q4SEM <- Q4ME / Q4T
Q4SEM
## [1] 3.506963
Q4SSD <- Q4SEM * sqrt(Q4n)
Q4SSD
## [1] 17.53481
mym <- matrix(c(4,30,24,45), nrow=2)
colnames(mym) <- c("control", "treatment")
rownames(mym) <- c("alive", "dead")
mym
## control treatment
## alive 4 24
## dead 30 45
##If we constructed a confidence interval despite this problem, the interval would be inaccurate and have no expected level of confidence -> the interval would be wider than it should be. Additionally, the level of confidence would be lower than the intended level, meaning that the probability that the true difference in survival rates would be lower than the intended level.
Q6df <- read.csv("C:/ChromeDownload/train (1).csv")
str(Q6df)
## 'data.frame': 891 obs. of 12 variables:
## $ PassengerId: int 1 2 3 4 5 6 7 8 9 10 ...
## $ Survived : int 0 1 1 1 0 0 0 0 1 1 ...
## $ Pclass : int 3 1 3 1 3 3 1 3 3 2 ...
## $ Name : chr "Braund, Mr. Owen Harris" "Cumings, Mrs. John Bradley (Florence Briggs Thayer)" "Heikkinen, Miss. Laina" "Futrelle, Mrs. Jacques Heath (Lily May Peel)" ...
## $ Sex : chr "male" "female" "female" "female" ...
## $ Age : num 22 38 26 35 35 NA 54 2 27 14 ...
## $ SibSp : int 1 1 0 1 0 0 0 3 0 1 ...
## $ Parch : int 0 0 0 0 0 0 0 1 2 0 ...
## $ Ticket : chr "A/5 21171" "PC 17599" "STON/O2. 3101282" "113803" ...
## $ Fare : num 7.25 71.28 7.92 53.1 8.05 ...
## $ Cabin : chr "" "C85" "" "C123" ...
## $ Embarked : chr "S" "C" "S" "S" ...
colSums(is.na(Q6df))
## PassengerId Survived Pclass Name Sex Age
## 0 0 0 0 0 177
## SibSp Parch Ticket Fare Cabin Embarked
## 0 0 0 0 0 0
Q6dfC <- na.omit(Q6df)
CMA <- cor(Q6dfC$Survived, Q6dfC$Age)
CMA
## [1] -0.07722109
CMS <- cor(Q6dfC$Survived, Q6dfC$SibSp)
CMS
## [1] -0.01735836
CMP <- cor(Q6dfC$Survived, Q6dfC$PassengerId)
CMP
## [1] 0.02934016
library(psych)
summary(Q6dfC)
## PassengerId Survived Pclass Name
## Min. : 1.0 Min. :0.0000 Min. :1.000 Length:714
## 1st Qu.:222.2 1st Qu.:0.0000 1st Qu.:1.000 Class :character
## Median :445.0 Median :0.0000 Median :2.000 Mode :character
## Mean :448.6 Mean :0.4062 Mean :2.237
## 3rd Qu.:677.8 3rd Qu.:1.0000 3rd Qu.:3.000
## Max. :891.0 Max. :1.0000 Max. :3.000
## Sex Age SibSp Parch
## Length:714 Min. : 0.42 Min. :0.0000 Min. :0.0000
## Class :character 1st Qu.:20.12 1st Qu.:0.0000 1st Qu.:0.0000
## Mode :character Median :28.00 Median :0.0000 Median :0.0000
## Mean :29.70 Mean :0.5126 Mean :0.4314
## 3rd Qu.:38.00 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :80.00 Max. :5.0000 Max. :6.0000
## Ticket Fare Cabin Embarked
## Length:714 Min. : 0.00 Length:714 Length:714
## Class :character 1st Qu.: 8.05 Class :character Class :character
## Mode :character Median : 15.74 Mode :character Mode :character
## Mean : 34.69
## 3rd Qu.: 33.38
## Max. :512.33
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 1.0.1
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.3.0 ✔ stringr 1.5.0
## ✔ readr 2.1.3 ✔ forcats 1.0.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ ggplot2::%+%() masks psych::%+%()
## ✖ ggplot2::alpha() masks psych::alpha()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(dplyr)
set.seed(100)
train <- sample_n(Q6dfC, 500)
model1 <- lm(Survived ~ Age , data = train)
plot(model1)
summary(model1)
##
## Call:
## lm(formula = Survived ~ Age, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4501 -0.3977 -0.3595 0.5979 0.7224
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.452319 0.049479 9.142 <2e-16 ***
## Age -0.002183 0.001508 -1.448 0.148
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4872 on 498 degrees of freedom
## Multiple R-squared: 0.004192, Adjusted R-squared: 0.002192
## F-statistic: 2.096 on 1 and 498 DF, p-value: 0.1483
model2 <- glm(Survived ~ Sex + Age, data = train, family = binomial)
plot(model2)
ND <- data.frame(Sex = c("male", "female"), Age = c(30, 25))
prediction <- predict(model2, newdata = ND)
prediction
## 1 2
## -1.428669 1.125678
library(caret)
## 载入需要的程辑包:lattice
##
## 载入程辑包:'caret'
## The following object is masked from 'package:purrr':
##
## lift
train$Survived <- factor(train$Survived, levels = c(0, 1), labels = c("Not Survived", "Survived"))
survival_estimates_all <- predict(model2, type = "response")
survival_decisions <- ifelse(survival_estimates_all >= 0.5, "Survived", "Not Survived")
survival_decisions <- factor(survival_decisions, levels = c("Not Survived", "Survived"))
conf_matrix <- confusionMatrix(survival_decisions, train$Survived)
conf_matrix$table
## Reference
## Prediction Not Survived Survived
## Not Survived 263 63
## Survived 43 131
accuracy <- ifelse(sum(conf_matrix$table) == 0, NA,
sum(diag(conf_matrix$table)) / sum(conf_matrix$table))
accuracy
## [1] 0.788