P(IQ >= 110) Mean = 100 SD = 16
result1 <- pnorm(q = 110,
mean = 100,
sd = 16,
lower.tail = FALSE
)
result1
## [1] 0.2659855
result1b <- pnorm(q = 110,
mean = 110,
sd = 16, lower.tail = FALSE)
result1b
## [1] 0.5
phat2 <- 0.48
p2 <- 0.50
q2 <- 1 - p2
alpha2 <- 0.05
n2 <- 331
z2 <- (phat2-p2)/sqrt(p2*(1-p2)/n2)
z2
## [1] -0.7277362
pvalue2 <- pnorm(z2)
pvalue2
## [1] 0.2333875
test_statistic2 <- z2
test_statistic2
## [1] -0.7277362
critical_value2 <- qnorm(p = alpha2, mean = 0, sd = 1)
critical_value2
## [1] -1.644854
phat2b <- 1 - phat2
Se2 <- sqrt(phat2*(phat2b)/n2)
Se2
## [1] 0.02746049
interval <- c(phat2 + 1.96 * Se2, phat2 - 1.96 * Se2)
interval
## [1] 0.5338226 0.4261774
n3 <- 22
xbar1 <- 4.9
xbar2 <- 6.1
sd3 <- 1.8
t3 <- (xbar1 - xbar2)/(sd3/sqrt(n3))
t3
## [1] -3.126944
alpha3 <- 0.05
Se3 <- qnorm(1-(alpha3/2))
pvalue3 <- 2*pnorm(t3)
pvalue3
## [1] 0.001766337
xbar1.4 <- 65
xbar2.4 <- 77
n4 <- 25
Sample_mean <- (xbar2.4 + xbar1.4)/2
Sample_mean
## [1] 71
MarginError <- (xbar2.4 - xbar1.4)/ 2
MarginError
## [1] 6
df <- n4 - 1
p <- 0.9
p24 <- p + (1 - p)/2
val4 <- qt(p24, df)
Se4 <- MarginError / val4
Sd4 <- Se4 * sqrt(n4)
Sd4
## [1] 17.53481
mym <- matrix(c(4,30,24,45), nrow=2)
colnames(mym) <- c("control", "treatment")
rownames(mym) <- c("alive","dead")
mym
## control treatment
## alive 4 24
## dead 30 45
install.packages("psych", repos = "https://cloud.r-project.org")
##
## The downloaded binary packages are in
## /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//RtmpcRds6T/downloaded_packages
library(psych)
install.packages("readr", repos = "https://cloud.r-project.org")
##
## The downloaded binary packages are in
## /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//RtmpcRds6T/downloaded_packages
library(readr)
install.packages("dplyr", repos = "https://cloud.r-project.org")
##
## The downloaded binary packages are in
## /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//RtmpcRds6T/downloaded_packages
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
mydata <- read_csv("train.csv")
## Rows: 891 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): Name, Sex, Ticket, Cabin, Embarked
## dbl (7): PassengerId, Survived, Pclass, Age, SibSp, Parch, Fare
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
mydata
## # A tibble: 891 × 12
## PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin
## <dbl> <dbl> <dbl> <chr> <chr> <dbl> <dbl> <dbl> <chr> <dbl> <chr>
## 1 1 0 3 Braun… male 22 1 0 A/5 2… 7.25 <NA>
## 2 2 1 1 Cumin… fema… 38 1 0 PC 17… 71.3 C85
## 3 3 1 3 Heikk… fema… 26 0 0 STON/… 7.92 <NA>
## 4 4 1 1 Futre… fema… 35 1 0 113803 53.1 C123
## 5 5 0 3 Allen… male 35 0 0 373450 8.05 <NA>
## 6 6 0 3 Moran… male NA 0 0 330877 8.46 <NA>
## 7 7 0 1 McCar… male 54 0 0 17463 51.9 E46
## 8 8 0 3 Palss… male 2 3 1 349909 21.1 <NA>
## 9 9 1 3 Johns… fema… 27 0 2 347742 11.1 <NA>
## 10 10 1 2 Nasse… fema… 14 1 0 237736 30.1 <NA>
## # … with 881 more rows, and 1 more variable: Embarked <chr>
describe(mydata)
## vars n mean sd median trimmed mad min max range
## PassengerId 1 891 446.00 257.35 446.00 446.00 330.62 1.00 891.00 890.00
## Survived 2 891 0.38 0.49 0.00 0.35 0.00 0.00 1.00 1.00
## Pclass 3 891 2.31 0.84 3.00 2.39 0.00 1.00 3.00 2.00
## Name* 4 891 446.00 257.35 446.00 446.00 330.62 1.00 891.00 890.00
## Sex* 5 891 1.65 0.48 2.00 1.68 0.00 1.00 2.00 1.00
## Age 6 714 29.70 14.53 28.00 29.27 13.34 0.42 80.00 79.58
## SibSp 7 891 0.52 1.10 0.00 0.27 0.00 0.00 8.00 8.00
## Parch 8 891 0.38 0.81 0.00 0.18 0.00 0.00 6.00 6.00
## Ticket* 9 891 339.52 200.83 338.00 339.65 268.35 1.00 681.00 680.00
## Fare 10 891 32.20 49.69 14.45 21.38 10.24 0.00 512.33 512.33
## Cabin* 11 204 77.00 42.23 76.00 77.09 54.11 1.00 147.00 146.00
## Embarked* 12 889 2.54 0.79 3.00 2.67 0.00 1.00 3.00 2.00
## skew kurtosis se
## PassengerId 0.00 -1.20 8.62
## Survived 0.48 -1.77 0.02
## Pclass -0.63 -1.28 0.03
## Name* 0.00 -1.20 8.62
## Sex* -0.62 -1.62 0.02
## Age 0.39 0.16 0.54
## SibSp 3.68 17.73 0.04
## Parch 2.74 9.69 0.03
## Ticket* 0.00 -1.28 6.73
## Fare 4.77 33.12 1.66
## Cabin* 0.00 -1.19 2.96
## Embarked* -1.26 -0.23 0.03
colSums(is.na(mydata))
## PassengerId Survived Pclass Name Sex Age
## 0 0 0 0 0 177
## SibSp Parch Ticket Fare Cabin Embarked
## 0 0 0 0 687 2
mydata$Age[is.na(mydata$Age)] <- median(mydata$Age, na.rm=TRUE)
mydata$Sex <- if_else(mydata$Sex == "male",1,0)
#
cor(mydata$Survived, mydata$Age)
## [1] -0.06491042
cor(mydata$Survived, mydata$Sex)
## [1] -0.5433514
cor(mydata$Survived, mydata$Pclass)
## [1] -0.338481
set.seed(100)
train <- sample_n(mydata,500)
reg1 <- lm(train$Survived ~ train$Pclass + train$Age + train$Sex)
summary(reg1)
##
## Call:
## lm(formula = train$Survived ~ train$Pclass + train$Age + train$Sex)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.0791 -0.2568 -0.1164 0.2534 0.9845
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.267864 0.082326 15.401 < 2e-16 ***
## train$Pclass -0.176925 0.023193 -7.628 1.23e-13 ***
## train$Age -0.005937 0.001480 -4.012 6.94e-05 ***
## train$Sex -0.454444 0.039011 -11.649 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4007 on 496 degrees of freedom
## Multiple R-squared: 0.3227, Adjusted R-squared: 0.3186
## F-statistic: 78.76 on 3 and 496 DF, p-value: < 2.2e-16
plot(reg1)
dataprediction <- predict(reg1)
dataprediction
## 1 2 3 4 5 6
## 0.110469463 0.570850700 0.688404300 0.157966376 0.753712554 0.732637057
## 7 8 9 10 11 12
## 0.357450892 0.410884919 0.116406577 0.499941629 0.315891094 0.229211744
## 13 14 15 16 17 18
## 0.470256059 0.517416673 0.057035437 0.924700181 0.110469463 0.346765344
## 19 20 21 22 23 24
## 0.570850700 0.912825953 0.339639550 0.783398124 0.169840604 0.128280805
## 25 26 27 28 29 30
## 0.523353787 0.152029262 0.447696283 0.676530072 0.924700181 0.606473384
## 31 32 33 34 35 36
## 0.116406577 0.966259979 0.327765322 0.428696261 0.570850700 0.924700181
## 37 38 39 40 41 42
## 0.104532349 0.293331318 0.187651946 0.104532349 -0.020147045 0.217337516
## 43 44 45 46 47 48
## 0.116406577 0.671781638 0.134217920 0.175777718 0.104532349 0.143123591
## 49 50 51 52 53 54
## 0.612410498 0.274331296 0.700278528 0.630221840 0.707404322 0.116406577
## 55 56 57 58 59 60
## 0.292142638 0.469919761 0.345576664 0.276708656 0.677718752 0.488067401
## 61 62 63 64 65 66
## 0.092658121 0.104532349 0.116406577 0.256519954 0.387136462 0.746586760
## 67 68 69 70 71 72
## 0.334891116 0.587473362 0.257708634 0.725215664 0.458045533 0.268394182
## 73 74 75 76 77 78
## 0.333702436 0.116406577 -0.103266642 0.152029262 0.630221840 0.713341436
## 79 80 81 82 83 84
## 1.079065146 0.116406577 0.269582862 0.229211744 0.364576686 0.152029262
## 85 86 87 88 89 90
## 0.978134207 0.138966353 0.859391927 0.169840604 0.470256059 0.936574409
## 91 92 93 94 95 96
## 0.116406577 0.912825953 0.109280783 0.570850700 0.493668217 0.287394204
## 97 98 99 100 101 102
## 0.116406577 0.299268432 0.918763067 0.547102243 0.700278528 0.110469463
## 103 104 105 106 107 108
## 0.422759147 0.441759169 0.122343691 0.357450892 0.116406577 0.080783893
## 109 110 111 112 113 114
## 0.187651946 0.470256059 0.470256059 0.434633375 0.258897314 0.116406577
## 115 116 117 118 119 120
## 0.116406577 0.146092148 0.422759147 0.116406577 0.588662042 0.157966376
## 121 122 123 124 125 126
## 0.570850700 0.747775440 0.257708634 0.092658121 0.214960156 0.582724928
## 127 128 129 130 131 132
## 0.074846779 0.895014611 0.257708634 0.116406577 0.187651946 0.269582862
## 133 134 135 136 137 138
## 0.339639550 0.535564313 0.128280805 0.134217920 0.110469463 0.304016866
## 139 140 141 142 143 144
## 0.954385751 0.960322865 0.116406577 0.630221840 0.399010691 0.229211744
## 145 146 147 148 149 150
## 0.948448637 0.924700181 0.116406577 0.160934933 0.015475639 0.116406577
## 151 152 153 154 155 156
## 0.116406577 0.618347612 0.152029262 0.556007914 0.250582840 0.315891094
## 157 158 159 160 161 162
## 0.446507603 0.746586760 0.138966353 0.594599156 0.163903490 0.163903490
## 163 164 165 166 167 168
## 0.098595235 0.116406577 0.169840604 0.116406577 0.653970296 0.716901190
## 169 170 171 172 173 174
## 1.001882663 0.140155034 0.859391927 0.758460988 0.128280805 0.116406577
## 175 176 177 178 179 180
## 0.718089870 0.800020786 0.116406577 0.175777718 0.116406577 0.990008435
## 181 182 183 184 185 186
## 0.116406577 0.464318945 0.352702458 0.247023086 0.835643471 0.157966376
## 187 188 189 190 191 192
## 0.718089870 0.323016888 0.593410476 0.157966376 0.116406577 0.847517699
## 193 194 195 196 197 198
## 0.116406577 0.470256059 0.116406577 0.311142660 0.276708656 0.175777718
## 199 200 201 202 203 204
## 0.588662042 0.134217920 0.293331318 0.116406577 0.157966376 0.116406577
## 205 206 207 208 209 210
## 0.092658121 0.529627199 0.453633397 0.606473384 0.924700181 0.422759147
## 211 212 213 214 215 216
## 0.612410498 0.116406577 0.116406577 0.042192652 0.092658121 0.140155034
## 217 218 219 220 221 222
## 0.199526174 0.470256059 0.157966376 0.366356563 0.428696261 0.280152182
## 223 224 225 226 227 228
## 0.642096068 0.636158954 0.116406577 0.753712554 0.747775440 0.352702458
## 229 230 231 232 233 234
## 0.281457090 0.163903490 0.116406577 0.452444717 0.146092148 0.553039357
## 235 236 237 238 239 240
## 0.098595235 0.134217920 0.966259979 0.092658121 0.163903490 0.470256059
## 241 242 243 244 245 246
## 0.494004515 0.137186477 0.978134207 0.570850700 0.683655866 0.357450892
## 247 248 249 250 251 252
## 0.553039357 0.077815336 -0.079518185 0.890266177 0.134217920 0.021412753
## 253 254 255 256 257 258
## 0.476193173 0.351513778 0.116406577 0.157966376 0.505878743 0.198337494
## 259 260 261 262 263 264
## 0.086721007 0.128280805 0.187651946 0.074846779 0.104532349 0.116406577
## 265 266 267 268 269 270
## 0.304016866 0.523690085 0.116406577 0.612410498 0.494004515 0.990008435
## 271 272 273 274 275 276
## 0.003601411 0.570850700 0.210211722 0.258897314 0.051098323 0.725215664
## 277 278 279 280 281 282
## 0.116406577 0.648033182 0.813083695 0.865329041 0.293331318 0.122343691
## 283 284 285 286 287 288
## 0.564913586 0.134217920 0.146092148 0.629033160 0.116406577 0.570850700
## 289 290 291 292 293 294
## 0.570850700 0.210211722 0.369325120 0.181714832 0.101563792 0.387136462
## 295 296 297 298 299 300
## 0.104532349 0.251771520 0.163903490 0.116406577 0.051098323 0.805957900
## 301 302 303 304 305 306
## 0.782209444 0.454642706 0.747775440 0.948448637 0.511479559 0.280268410
## 307 308 309 310 311 312
## 0.570850700 0.122343691 0.157966376 0.110469463 0.264834428 0.116406577
## 313 314 315 316 317 318
## 0.470256059 0.399010691 0.292142638 0.134217920 0.264834428 0.116406577
## 319 320 321 322 323 324
## 0.454642706 0.664655844 0.469919761 0.152029262 0.163903490 0.670592958
## 325 326 327 328 329 330
## 0.600536270 0.163903490 0.169840604 0.995945549 0.452444717 0.369325120
## 331 332 333 334 335 336
## 0.116406577 0.416822033 -0.135920769 0.877203269 0.771523896 0.323016888
## 337 338 339 340 341 342
## 0.235148858 0.281457090 0.890266177 0.110469463 0.777461010 0.883140383
## 343 344 345 346 347 348
## 0.116406577 0.180526152 0.021412753 0.140155034 0.116406577 0.257708634
## 349 350 351 352 353 354
## 0.293331318 0.116406577 0.116406577 0.116406577 0.121155011 0.713341436
## 355 356 357 358 359 360
## 0.493668217 0.575599134 0.162714810 0.311142660 0.134217920 0.134217920
## 361 362 363 364 365 366
## 0.116406577 0.576787814 0.116406577 0.948448637 0.015475639 0.570850700
## 367 368 369 370 371 372
## 0.116406577 0.470256059 0.564913586 0.339639550 0.116406577 0.245834406
## 373 374 375 376 377 378
## 0.163903490 0.646844502 0.116406577 0.865329041 0.464318945 0.707404322
## 379 380 381 382 383 384
## 0.021412753 0.570850700 0.116406577 0.068909665 0.346765344 0.570850700
## 385 386 387 388 389 390
## 0.039224095 0.570850700 0.187651946 0.250582840 0.470256059 0.116406577
## 391 392 393 394 395 396
## 0.140155034 0.152029262 0.617158932 0.731152778 0.570850700 0.187651946
## 397 398 399 400 401 402
## 0.256519954 0.092658121 0.428696261 0.080783893 0.877203269 0.416822033
## 403 404 405 406 407 408
## 0.067720985 0.086721007 0.476193173 0.293331318 -0.014209931 0.712152756
## 409 410 411 412 413 414
## 0.570850700 0.630221840 0.630221840 0.317079774 0.116406577 0.771523896
## 415 416 417 418 419 420
## 0.813083695 0.712152756 0.648033182 0.152029262 0.570850700 0.706215642
## 421 422 423 424 425 426
## 0.470256059 0.116406577 0.807146581 0.505542445 0.606473384 0.453633397
## 427 428 429 430 431 432
## 0.747775440 0.652781616 0.128280805 0.823769243 0.470256059 0.128280805
## 433 434 435 436 437 438
## 0.836832151 0.801209466 0.352702458 0.163903490 0.140155034 0.128280805
## 439 440 441 442 443 444
## 0.570850700 0.299268432 0.128280805 0.116406577 0.642096068 0.214960156
## 445 446 447 448 449 450
## 0.033286981 0.884329063 0.553039357 0.859391927 0.651001739 0.351513778
## 451 452 453 454 455 456
## 0.045161209 0.476193173 0.175777718 0.116406577 0.181714832 0.713341436
## 457 458 459 460 461 462
## 0.269582862 0.116406577 0.412073599 0.258897314 0.323016888 0.169840604
## 463 464 465 466 467 468
## 0.369325120 0.689592980 0.327765322 0.311142660 0.110469463 0.150840582
## 469 470 471 472 473 474
## 0.741838326 -0.008272817 -0.020147045 0.906888839 0.404947805 0.116406577
## 475 476 477 478 479 480
## 0.859391927 0.116406577 0.721058427 0.924700181 0.157966376 0.829706357
## 481 482 483 484 485 486
## 0.624284726 0.245834406 0.924700181 0.181714832 0.281457090 0.725215664
## 487 488 489 490 491 492
## 0.309953980 0.116406577 0.759649668 0.488067401 0.399010691 0.883140383
## 493 494 495 496 497 498
## 0.086721007 0.794083672 0.346765344 0.152029262 0.104532349 0.039224095
## 499 500
## 0.138966353 0.116406577
train$predictions <- c(dataprediction)
train
## # A tibble: 500 × 13
## PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin
## <dbl> <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <chr>
## 1 714 0 3 "Lars… 1 29 0 0 7545 9.48 <NA>
## 2 503 0 3 "O'Su… 0 28 0 0 330909 7.63 <NA>
## 3 358 0 2 "Funk… 0 38 0 0 237671 13 <NA>
## 4 624 0 3 "Hans… 1 21 0 0 350029 7.85 <NA>
## 5 718 1 2 "Trou… 0 27 0 0 34218 10.5 E101
## 6 470 1 3 "Bacl… 0 0.75 2 1 2666 19.3 <NA>
## 7 516 0 1 "Walk… 1 47 0 0 36967 34.0 D46
## 8 823 0 1 "Reuc… 1 38 0 0 19972 0 <NA>
## 9 838 0 3 "Siro… 1 28 0 0 392092 8.05 <NA>
## 10 98 1 1 "Gree… 1 23 0 1 PC 17… 63.4 D10 …
## # … with 490 more rows, and 2 more variables: Embarked <chr>, predictions <dbl>
train$predictions <- if_else(train$predictions > 0.5, 1,0)
train
## # A tibble: 500 × 13
## PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin
## <dbl> <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <chr>
## 1 714 0 3 "Lars… 1 29 0 0 7545 9.48 <NA>
## 2 503 0 3 "O'Su… 0 28 0 0 330909 7.63 <NA>
## 3 358 0 2 "Funk… 0 38 0 0 237671 13 <NA>
## 4 624 0 3 "Hans… 1 21 0 0 350029 7.85 <NA>
## 5 718 1 2 "Trou… 0 27 0 0 34218 10.5 E101
## 6 470 1 3 "Bacl… 0 0.75 2 1 2666 19.3 <NA>
## 7 516 0 1 "Walk… 1 47 0 0 36967 34.0 D46
## 8 823 0 1 "Reuc… 1 38 0 0 19972 0 <NA>
## 9 838 0 3 "Siro… 1 28 0 0 392092 8.05 <NA>
## 10 98 1 1 "Gree… 1 23 0 1 PC 17… 63.4 D10 …
## # … with 490 more rows, and 2 more variables: Embarked <chr>, predictions <dbl>
# actually observed outcomes
observed <- train$Survived
# putting observed outcomes and predicted outcomes into a table
outcome.table <- table(observed, train$predictions )
outcome.table
##
## observed 0 1
## 0 268 43
## 1 71 118