Question 1 A)

P(IQ >= 110) Mean = 100 SD = 16

result1 <- pnorm(q = 110,
      mean = 100, 
      sd = 16, 
      lower.tail = FALSE
      )
result1
## [1] 0.2659855

The probability is 26.59% that a random selected person has an iq higher of 110.

Question 1 B) ’

P(IQ>= 110) mean = 110, sd = 16

result1b <- pnorm(q = 110,
                  mean = 110, 
                  sd = 16,    lower.tail = FALSE)

result1b
## [1] 0.5

Question 2 A)

Null and alternative Hypothesis Ho: = 50% adults who could not afford go to college.

Ha < 50% Adluts who could not afford go to college.

Significance level of 0.05

phat2 <- 0.48
p2 <- 0.50 
q2 <- 1 - p2

alpha2 <- 0.05
n2 <- 331
z2 <- (phat2-p2)/sqrt(p2*(1-p2)/n2)
z2
## [1] -0.7277362

PVALUE

pvalue2 <- pnorm(z2)
pvalue2 
## [1] 0.2333875

TEST statistic

test_statistic2 <- z2
test_statistic2
## [1] -0.7277362

Critical value

critical_value2 <- qnorm(p = alpha2, mean = 0, sd = 1)
critical_value2
## [1] -1.644854

We can see that the test statistic is lower than critical value in absolute terms. Meaning that we can not reject the Null.

Question 2 B)

phat2b <- 1 - phat2

Se2 <- sqrt(phat2*(phat2b)/n2)
Se2
## [1] 0.02746049
interval <- c(phat2 + 1.96 * Se2, phat2 - 1.96 * Se2)
interval
## [1] 0.5338226 0.4261774

Since our cofindence interval are between 42.61% and 53.38% and we have a sd of 2.7%. We can say that 0.5 will be included.

Question 3 A)

Ho Mu1 = Mu2 Ha: Mu1 not equal to Mu2

Confidence Interval 0.05

n3 <- 22
xbar1 <- 4.9
xbar2 <- 6.1
sd3 <- 1.8 
t3 <- (xbar1 - xbar2)/(sd3/sqrt(n3))
t3
## [1] -3.126944
alpha3 <- 0.05
Se3 <- qnorm(1-(alpha3/2))
pvalue3 <- 2*pnorm(t3)
pvalue3
## [1] 0.001766337

Since the Pvalue is lower than alpha we can reject the null hypothesis.

Question 4

xbar1.4 <- 65 
xbar2.4 <- 77
n4 <- 25
Sample_mean <- (xbar2.4 + xbar1.4)/2
Sample_mean
## [1] 71
MarginError <- (xbar2.4 - xbar1.4)/ 2 
MarginError
## [1] 6

Calculating Sd

df <- n4 - 1
p <- 0.9 
p24 <- p + (1 - p)/2

val4 <- qt(p24, df)
Se4 <- MarginError / val4

Sd4 <- Se4 * sqrt(n4)
Sd4
## [1] 17.53481

Question 5

mym <- matrix(c(4,30,24,45), nrow=2)
colnames(mym) <- c("control", "treatment")
rownames(mym) <- c("alive","dead")
mym
##       control treatment
## alive       4        24
## dead       30        45

We can not construct the interval using the normal approximation because the number of success (alive) must be at least 5 or bigger. To construct a confidence interval, we must have normality. If we do the confindence interval test will see thay the data is showing skewness.

Question 6

install.packages("psych", repos = "https://cloud.r-project.org")
## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//RtmpcRds6T/downloaded_packages
library(psych)
install.packages("readr", repos = "https://cloud.r-project.org")
## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//RtmpcRds6T/downloaded_packages
library(readr)
install.packages("dplyr", repos = "https://cloud.r-project.org")
## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//RtmpcRds6T/downloaded_packages
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
mydata <- read_csv("train.csv")
## Rows: 891 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): Name, Sex, Ticket, Cabin, Embarked
## dbl (7): PassengerId, Survived, Pclass, Age, SibSp, Parch, Fare
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
mydata
## # A tibble: 891 × 12
##    PassengerId Survived Pclass Name   Sex     Age SibSp Parch Ticket  Fare Cabin
##          <dbl>    <dbl>  <dbl> <chr>  <chr> <dbl> <dbl> <dbl> <chr>  <dbl> <chr>
##  1           1        0      3 Braun… male     22     1     0 A/5 2…  7.25 <NA> 
##  2           2        1      1 Cumin… fema…    38     1     0 PC 17… 71.3  C85  
##  3           3        1      3 Heikk… fema…    26     0     0 STON/…  7.92 <NA> 
##  4           4        1      1 Futre… fema…    35     1     0 113803 53.1  C123 
##  5           5        0      3 Allen… male     35     0     0 373450  8.05 <NA> 
##  6           6        0      3 Moran… male     NA     0     0 330877  8.46 <NA> 
##  7           7        0      1 McCar… male     54     0     0 17463  51.9  E46  
##  8           8        0      3 Palss… male      2     3     1 349909 21.1  <NA> 
##  9           9        1      3 Johns… fema…    27     0     2 347742 11.1  <NA> 
## 10          10        1      2 Nasse… fema…    14     1     0 237736 30.1  <NA> 
## # … with 881 more rows, and 1 more variable: Embarked <chr>
describe(mydata)
##             vars   n   mean     sd median trimmed    mad  min    max  range
## PassengerId    1 891 446.00 257.35 446.00  446.00 330.62 1.00 891.00 890.00
## Survived       2 891   0.38   0.49   0.00    0.35   0.00 0.00   1.00   1.00
## Pclass         3 891   2.31   0.84   3.00    2.39   0.00 1.00   3.00   2.00
## Name*          4 891 446.00 257.35 446.00  446.00 330.62 1.00 891.00 890.00
## Sex*           5 891   1.65   0.48   2.00    1.68   0.00 1.00   2.00   1.00
## Age            6 714  29.70  14.53  28.00   29.27  13.34 0.42  80.00  79.58
## SibSp          7 891   0.52   1.10   0.00    0.27   0.00 0.00   8.00   8.00
## Parch          8 891   0.38   0.81   0.00    0.18   0.00 0.00   6.00   6.00
## Ticket*        9 891 339.52 200.83 338.00  339.65 268.35 1.00 681.00 680.00
## Fare          10 891  32.20  49.69  14.45   21.38  10.24 0.00 512.33 512.33
## Cabin*        11 204  77.00  42.23  76.00   77.09  54.11 1.00 147.00 146.00
## Embarked*     12 889   2.54   0.79   3.00    2.67   0.00 1.00   3.00   2.00
##              skew kurtosis   se
## PassengerId  0.00    -1.20 8.62
## Survived     0.48    -1.77 0.02
## Pclass      -0.63    -1.28 0.03
## Name*        0.00    -1.20 8.62
## Sex*        -0.62    -1.62 0.02
## Age          0.39     0.16 0.54
## SibSp        3.68    17.73 0.04
## Parch        2.74     9.69 0.03
## Ticket*      0.00    -1.28 6.73
## Fare         4.77    33.12 1.66
## Cabin*       0.00    -1.19 2.96
## Embarked*   -1.26    -0.23 0.03

Filtering the data

colSums(is.na(mydata))
## PassengerId    Survived      Pclass        Name         Sex         Age 
##           0           0           0           0           0         177 
##       SibSp       Parch      Ticket        Fare       Cabin    Embarked 
##           0           0           0           0         687           2
mydata$Age[is.na(mydata$Age)] <- median(mydata$Age, na.rm=TRUE)

mydata$Sex <- if_else(mydata$Sex == "male",1,0)
# 
cor(mydata$Survived, mydata$Age)
## [1] -0.06491042
cor(mydata$Survived, mydata$Sex)
## [1] -0.5433514
cor(mydata$Survived, mydata$Pclass)
## [1] -0.338481
set.seed(100)
train <- sample_n(mydata,500)
reg1 <- lm(train$Survived ~ train$Pclass + train$Age + train$Sex)
summary(reg1)
## 
## Call:
## lm(formula = train$Survived ~ train$Pclass + train$Age + train$Sex)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.0791 -0.2568 -0.1164  0.2534  0.9845 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.267864   0.082326  15.401  < 2e-16 ***
## train$Pclass -0.176925   0.023193  -7.628 1.23e-13 ***
## train$Age    -0.005937   0.001480  -4.012 6.94e-05 ***
## train$Sex    -0.454444   0.039011 -11.649  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4007 on 496 degrees of freedom
## Multiple R-squared:  0.3227, Adjusted R-squared:  0.3186 
## F-statistic: 78.76 on 3 and 496 DF,  p-value: < 2.2e-16
plot(reg1)

dataprediction <- predict(reg1)
dataprediction
##            1            2            3            4            5            6 
##  0.110469463  0.570850700  0.688404300  0.157966376  0.753712554  0.732637057 
##            7            8            9           10           11           12 
##  0.357450892  0.410884919  0.116406577  0.499941629  0.315891094  0.229211744 
##           13           14           15           16           17           18 
##  0.470256059  0.517416673  0.057035437  0.924700181  0.110469463  0.346765344 
##           19           20           21           22           23           24 
##  0.570850700  0.912825953  0.339639550  0.783398124  0.169840604  0.128280805 
##           25           26           27           28           29           30 
##  0.523353787  0.152029262  0.447696283  0.676530072  0.924700181  0.606473384 
##           31           32           33           34           35           36 
##  0.116406577  0.966259979  0.327765322  0.428696261  0.570850700  0.924700181 
##           37           38           39           40           41           42 
##  0.104532349  0.293331318  0.187651946  0.104532349 -0.020147045  0.217337516 
##           43           44           45           46           47           48 
##  0.116406577  0.671781638  0.134217920  0.175777718  0.104532349  0.143123591 
##           49           50           51           52           53           54 
##  0.612410498  0.274331296  0.700278528  0.630221840  0.707404322  0.116406577 
##           55           56           57           58           59           60 
##  0.292142638  0.469919761  0.345576664  0.276708656  0.677718752  0.488067401 
##           61           62           63           64           65           66 
##  0.092658121  0.104532349  0.116406577  0.256519954  0.387136462  0.746586760 
##           67           68           69           70           71           72 
##  0.334891116  0.587473362  0.257708634  0.725215664  0.458045533  0.268394182 
##           73           74           75           76           77           78 
##  0.333702436  0.116406577 -0.103266642  0.152029262  0.630221840  0.713341436 
##           79           80           81           82           83           84 
##  1.079065146  0.116406577  0.269582862  0.229211744  0.364576686  0.152029262 
##           85           86           87           88           89           90 
##  0.978134207  0.138966353  0.859391927  0.169840604  0.470256059  0.936574409 
##           91           92           93           94           95           96 
##  0.116406577  0.912825953  0.109280783  0.570850700  0.493668217  0.287394204 
##           97           98           99          100          101          102 
##  0.116406577  0.299268432  0.918763067  0.547102243  0.700278528  0.110469463 
##          103          104          105          106          107          108 
##  0.422759147  0.441759169  0.122343691  0.357450892  0.116406577  0.080783893 
##          109          110          111          112          113          114 
##  0.187651946  0.470256059  0.470256059  0.434633375  0.258897314  0.116406577 
##          115          116          117          118          119          120 
##  0.116406577  0.146092148  0.422759147  0.116406577  0.588662042  0.157966376 
##          121          122          123          124          125          126 
##  0.570850700  0.747775440  0.257708634  0.092658121  0.214960156  0.582724928 
##          127          128          129          130          131          132 
##  0.074846779  0.895014611  0.257708634  0.116406577  0.187651946  0.269582862 
##          133          134          135          136          137          138 
##  0.339639550  0.535564313  0.128280805  0.134217920  0.110469463  0.304016866 
##          139          140          141          142          143          144 
##  0.954385751  0.960322865  0.116406577  0.630221840  0.399010691  0.229211744 
##          145          146          147          148          149          150 
##  0.948448637  0.924700181  0.116406577  0.160934933  0.015475639  0.116406577 
##          151          152          153          154          155          156 
##  0.116406577  0.618347612  0.152029262  0.556007914  0.250582840  0.315891094 
##          157          158          159          160          161          162 
##  0.446507603  0.746586760  0.138966353  0.594599156  0.163903490  0.163903490 
##          163          164          165          166          167          168 
##  0.098595235  0.116406577  0.169840604  0.116406577  0.653970296  0.716901190 
##          169          170          171          172          173          174 
##  1.001882663  0.140155034  0.859391927  0.758460988  0.128280805  0.116406577 
##          175          176          177          178          179          180 
##  0.718089870  0.800020786  0.116406577  0.175777718  0.116406577  0.990008435 
##          181          182          183          184          185          186 
##  0.116406577  0.464318945  0.352702458  0.247023086  0.835643471  0.157966376 
##          187          188          189          190          191          192 
##  0.718089870  0.323016888  0.593410476  0.157966376  0.116406577  0.847517699 
##          193          194          195          196          197          198 
##  0.116406577  0.470256059  0.116406577  0.311142660  0.276708656  0.175777718 
##          199          200          201          202          203          204 
##  0.588662042  0.134217920  0.293331318  0.116406577  0.157966376  0.116406577 
##          205          206          207          208          209          210 
##  0.092658121  0.529627199  0.453633397  0.606473384  0.924700181  0.422759147 
##          211          212          213          214          215          216 
##  0.612410498  0.116406577  0.116406577  0.042192652  0.092658121  0.140155034 
##          217          218          219          220          221          222 
##  0.199526174  0.470256059  0.157966376  0.366356563  0.428696261  0.280152182 
##          223          224          225          226          227          228 
##  0.642096068  0.636158954  0.116406577  0.753712554  0.747775440  0.352702458 
##          229          230          231          232          233          234 
##  0.281457090  0.163903490  0.116406577  0.452444717  0.146092148  0.553039357 
##          235          236          237          238          239          240 
##  0.098595235  0.134217920  0.966259979  0.092658121  0.163903490  0.470256059 
##          241          242          243          244          245          246 
##  0.494004515  0.137186477  0.978134207  0.570850700  0.683655866  0.357450892 
##          247          248          249          250          251          252 
##  0.553039357  0.077815336 -0.079518185  0.890266177  0.134217920  0.021412753 
##          253          254          255          256          257          258 
##  0.476193173  0.351513778  0.116406577  0.157966376  0.505878743  0.198337494 
##          259          260          261          262          263          264 
##  0.086721007  0.128280805  0.187651946  0.074846779  0.104532349  0.116406577 
##          265          266          267          268          269          270 
##  0.304016866  0.523690085  0.116406577  0.612410498  0.494004515  0.990008435 
##          271          272          273          274          275          276 
##  0.003601411  0.570850700  0.210211722  0.258897314  0.051098323  0.725215664 
##          277          278          279          280          281          282 
##  0.116406577  0.648033182  0.813083695  0.865329041  0.293331318  0.122343691 
##          283          284          285          286          287          288 
##  0.564913586  0.134217920  0.146092148  0.629033160  0.116406577  0.570850700 
##          289          290          291          292          293          294 
##  0.570850700  0.210211722  0.369325120  0.181714832  0.101563792  0.387136462 
##          295          296          297          298          299          300 
##  0.104532349  0.251771520  0.163903490  0.116406577  0.051098323  0.805957900 
##          301          302          303          304          305          306 
##  0.782209444  0.454642706  0.747775440  0.948448637  0.511479559  0.280268410 
##          307          308          309          310          311          312 
##  0.570850700  0.122343691  0.157966376  0.110469463  0.264834428  0.116406577 
##          313          314          315          316          317          318 
##  0.470256059  0.399010691  0.292142638  0.134217920  0.264834428  0.116406577 
##          319          320          321          322          323          324 
##  0.454642706  0.664655844  0.469919761  0.152029262  0.163903490  0.670592958 
##          325          326          327          328          329          330 
##  0.600536270  0.163903490  0.169840604  0.995945549  0.452444717  0.369325120 
##          331          332          333          334          335          336 
##  0.116406577  0.416822033 -0.135920769  0.877203269  0.771523896  0.323016888 
##          337          338          339          340          341          342 
##  0.235148858  0.281457090  0.890266177  0.110469463  0.777461010  0.883140383 
##          343          344          345          346          347          348 
##  0.116406577  0.180526152  0.021412753  0.140155034  0.116406577  0.257708634 
##          349          350          351          352          353          354 
##  0.293331318  0.116406577  0.116406577  0.116406577  0.121155011  0.713341436 
##          355          356          357          358          359          360 
##  0.493668217  0.575599134  0.162714810  0.311142660  0.134217920  0.134217920 
##          361          362          363          364          365          366 
##  0.116406577  0.576787814  0.116406577  0.948448637  0.015475639  0.570850700 
##          367          368          369          370          371          372 
##  0.116406577  0.470256059  0.564913586  0.339639550  0.116406577  0.245834406 
##          373          374          375          376          377          378 
##  0.163903490  0.646844502  0.116406577  0.865329041  0.464318945  0.707404322 
##          379          380          381          382          383          384 
##  0.021412753  0.570850700  0.116406577  0.068909665  0.346765344  0.570850700 
##          385          386          387          388          389          390 
##  0.039224095  0.570850700  0.187651946  0.250582840  0.470256059  0.116406577 
##          391          392          393          394          395          396 
##  0.140155034  0.152029262  0.617158932  0.731152778  0.570850700  0.187651946 
##          397          398          399          400          401          402 
##  0.256519954  0.092658121  0.428696261  0.080783893  0.877203269  0.416822033 
##          403          404          405          406          407          408 
##  0.067720985  0.086721007  0.476193173  0.293331318 -0.014209931  0.712152756 
##          409          410          411          412          413          414 
##  0.570850700  0.630221840  0.630221840  0.317079774  0.116406577  0.771523896 
##          415          416          417          418          419          420 
##  0.813083695  0.712152756  0.648033182  0.152029262  0.570850700  0.706215642 
##          421          422          423          424          425          426 
##  0.470256059  0.116406577  0.807146581  0.505542445  0.606473384  0.453633397 
##          427          428          429          430          431          432 
##  0.747775440  0.652781616  0.128280805  0.823769243  0.470256059  0.128280805 
##          433          434          435          436          437          438 
##  0.836832151  0.801209466  0.352702458  0.163903490  0.140155034  0.128280805 
##          439          440          441          442          443          444 
##  0.570850700  0.299268432  0.128280805  0.116406577  0.642096068  0.214960156 
##          445          446          447          448          449          450 
##  0.033286981  0.884329063  0.553039357  0.859391927  0.651001739  0.351513778 
##          451          452          453          454          455          456 
##  0.045161209  0.476193173  0.175777718  0.116406577  0.181714832  0.713341436 
##          457          458          459          460          461          462 
##  0.269582862  0.116406577  0.412073599  0.258897314  0.323016888  0.169840604 
##          463          464          465          466          467          468 
##  0.369325120  0.689592980  0.327765322  0.311142660  0.110469463  0.150840582 
##          469          470          471          472          473          474 
##  0.741838326 -0.008272817 -0.020147045  0.906888839  0.404947805  0.116406577 
##          475          476          477          478          479          480 
##  0.859391927  0.116406577  0.721058427  0.924700181  0.157966376  0.829706357 
##          481          482          483          484          485          486 
##  0.624284726  0.245834406  0.924700181  0.181714832  0.281457090  0.725215664 
##          487          488          489          490          491          492 
##  0.309953980  0.116406577  0.759649668  0.488067401  0.399010691  0.883140383 
##          493          494          495          496          497          498 
##  0.086721007  0.794083672  0.346765344  0.152029262  0.104532349  0.039224095 
##          499          500 
##  0.138966353  0.116406577
train$predictions <- c(dataprediction)
train 
## # A tibble: 500 × 13
##    PassengerId Survived Pclass Name     Sex   Age SibSp Parch Ticket  Fare Cabin
##          <dbl>    <dbl>  <dbl> <chr>  <dbl> <dbl> <dbl> <dbl> <chr>  <dbl> <chr>
##  1         714        0      3 "Lars…     1 29        0     0 7545    9.48 <NA> 
##  2         503        0      3 "O'Su…     0 28        0     0 330909  7.63 <NA> 
##  3         358        0      2 "Funk…     0 38        0     0 237671 13    <NA> 
##  4         624        0      3 "Hans…     1 21        0     0 350029  7.85 <NA> 
##  5         718        1      2 "Trou…     0 27        0     0 34218  10.5  E101 
##  6         470        1      3 "Bacl…     0  0.75     2     1 2666   19.3  <NA> 
##  7         516        0      1 "Walk…     1 47        0     0 36967  34.0  D46  
##  8         823        0      1 "Reuc…     1 38        0     0 19972   0    <NA> 
##  9         838        0      3 "Siro…     1 28        0     0 392092  8.05 <NA> 
## 10          98        1      1 "Gree…     1 23        0     1 PC 17… 63.4  D10 …
## # … with 490 more rows, and 2 more variables: Embarked <chr>, predictions <dbl>
train$predictions <- if_else(train$predictions > 0.5, 1,0)
train
## # A tibble: 500 × 13
##    PassengerId Survived Pclass Name     Sex   Age SibSp Parch Ticket  Fare Cabin
##          <dbl>    <dbl>  <dbl> <chr>  <dbl> <dbl> <dbl> <dbl> <chr>  <dbl> <chr>
##  1         714        0      3 "Lars…     1 29        0     0 7545    9.48 <NA> 
##  2         503        0      3 "O'Su…     0 28        0     0 330909  7.63 <NA> 
##  3         358        0      2 "Funk…     0 38        0     0 237671 13    <NA> 
##  4         624        0      3 "Hans…     1 21        0     0 350029  7.85 <NA> 
##  5         718        1      2 "Trou…     0 27        0     0 34218  10.5  E101 
##  6         470        1      3 "Bacl…     0  0.75     2     1 2666   19.3  <NA> 
##  7         516        0      1 "Walk…     1 47        0     0 36967  34.0  D46  
##  8         823        0      1 "Reuc…     1 38        0     0 19972   0    <NA> 
##  9         838        0      3 "Siro…     1 28        0     0 392092  8.05 <NA> 
## 10          98        1      1 "Gree…     1 23        0     1 PC 17… 63.4  D10 …
## # … with 490 more rows, and 2 more variables: Embarked <chr>, predictions <dbl>
# actually observed outcomes
observed <- train$Survived

# putting observed outcomes and predicted outcomes into a table
outcome.table <- table(observed, train$predictions )
outcome.table
##         
## observed   0   1
##        0 268  43
##        1  71 118