#Confidence Intervals solved
#############Confidence Intervals for One Sample: Continuous Outcome___1
###1. Since n > 30, we use Z score:
x_mean =28.5
n = 3326
S_sd = 5.32
ci = .90
ci_90 = x_mean + c(-1,1) * qnorm(ci+(1-ci)/2) * S_sd/sqrt(n)
ci_90 
## [1] 28.34827 28.65173
#90% confidence interval is (28.3482678, 28.6517322)

### 2. Since n < 30, we use t score with degrees of freedom
x_mean =27.26
n = 10
df = n - 1
S_sd = 3.1
ci = .90
ci_90 = x_mean + c(-1,1) * qt(ci+(1-ci)/2, df=df) * S_sd/sqrt(n)
ci_90
## [1] 25.46299 29.05701
#90% confidence interval is (25.4629883, 29.0570117)
##################A much easier way_method_1
# Calculate the mean and standard error
l.model <- lm(mpg ~ 1, mtcars)

# Calculate the confidence interval
confint(l.model, level=0.95)
##                2.5 %   97.5 %
## (Intercept) 17.91768 22.26357
###  Since n < 30, we use t score with degrees of freedom_method_2
x_mean = mean(mtcars$mpg)
n = length(mtcars$mpg)
df = n - 1
S_sd = sd(mtcars$mpg)
ci = .95
ci_90 = x_mean + c(-1,1) * qt(ci+(1-ci)/2, df=df) * S_sd/sqrt(n)
ci_90
## [1] 17.91768 22.26357
#######################################################


#############Confidence Interval for the Population Proportion___2
#The number of men found with or without cardiovascular disease (CVD). Estimate the prevalence of CVD in men using a 95% confidence interval.
x=244 #The number of men found with CVD
n = 1792 #The total number of men found with or without cardiovascular disease (CVD)
p_hat=x/n
SE = sqrt(p_hat*(1-p_hat)/n)
ci = .95
ci_95 = p_hat + c(-1,1) * pnorm(ci+(1-ci)/2) * SE
ci_95
## [1] 0.1293941 0.1429274
#95% confidence interval is (0.1293941, 0.1429274)
#With 95% confidence the prevalence of cardiovascular disease in men is between 12.0 to 15.2%.

#################Confidence Interval for Two Independent Samples, Continuous Outcome____3
#The use of Z or t again depends on whether the sample sizes are large (n1 > 30 and n2 > 30) or small.
n_1 = 6
n_2 = 4
df = n_1 + n_2 - 2
S_1_sd = 9.7
S_2_sd = 12
x_1_mean = 117.5
x_2_mean = 126.8

S_p = sqrt( ((n_1 - 1) * S_1_sd^2 + (n_2 - 1) * S_2_sd^2) / df )
print(S_p)
## [1] 10.62103
ci = .95
ci_95 = (x_1_mean - x_2_mean ) + c(-1,1) * qt(ci+(1-ci)/2, df=df) * S_p* sqrt(1/n_1+1/n_2)
ci_95
## [1] -25.109606   6.509606
#95% confidence interval for the difference is (-25.1096058, 6.5096058)

######Confidence Intervals for Matched Samples, Continuous Outcome___4
#In n > 30 use Z table for standard normal distribution
#If n < 30 use t-table with df = n-1
data_1 <- c(168,111, 139, 127, 155, 115, 125, 123, 130, 137, 130, 129, 112, 141, 122)
data_2 <- c(141, 119, 122, 127, 125, 123, 113, 106, 131, 142, 131, 135, 119, 130, 121)
length(data_1); length(data_2)
## [1] 15
## [1] 15
diff <- data_2 - data_1
diff
##  [1] -27   8 -17   0 -30   8 -12 -17   1   5   1   6   7 -11  -1
diff_mean <- mean(diff)
diff_mean #[1] -5.266667
## [1] -5.266667
diff_mean_sd <- sd(diff)
diff_mean_sd #[1] 12.80885
## [1] 12.80885
diff_sum <- sum(diff)
diff_sum #-79
## [1] -79
####################
diff_mean_diff <- round(diff -diff_mean,1)
sum(diff_mean_diff)
## [1] 0.5
diff_mean_diff_s <- diff_mean_diff^2
diff_mean_diff_s
##  [1] 470.89 176.89 136.89  28.09 610.09 176.89  44.89 136.89  39.69 106.09
## [11]  39.69 127.69 151.29  32.49  18.49
sum(diff_mean_diff_s)
## [1] 2296.95
################################
x_d <- diff_sum/length(data_1)
x_d #[1] -5.266667
## [1] -5.266667
s_d <- sqrt(sum(diff_mean_diff_s)/c(length(data_1)-1))
s_d #[1] 12.8089
## [1] 12.8089
df= length(data_1)-1
ci = .95
x_d + c(-1,1) * qt(ci+(1-ci)/2, df=14) * s_d/sqrt(length(data_1))
## [1] -12.359998   1.826664
#So, the 95% confidence interval for the difference is (-12.4, 1.8)
#?t.test
t.test(data_2,data_1, paired = T)
## 
##  Paired t-test
## 
## data:  data_2 and data_1
## t = -1.5925, df = 14, p-value = 0.1336
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -12.359972   1.826639
## sample estimates:
## mean of the differences 
##               -5.266667
###################Confidence Interval for Two Independent Samples, Dichotomous Outcome___5
###1_Confidence Interval for a Risk Difference or Prevalence Difference
#Compute the 95% confidence interval for the difference in proportions of patients reporting relief (in this case a risk difference, since it is a difference in cumulative incidence).
data_1 <- data.frame(reduced_number = c(23,11), total_number = c(50, 50))
rownames(data_1) <- c("New pain reliever","Standard pain reliever")
data_1$proportion <- data_1$reduced_number /data_1$total_number
data_1
##                        reduced_number total_number proportion
## New pain reliever                  23           50       0.46
## Standard pain reliever             11           50       0.22
p1 = 0.46
p2 = 0.22
ci = 0.95
ci_95 = (p1-p2) + c(-1,1)*qnorm(ci+(1-ci)/2)*sqrt(p1*(1-p1)/50 + p2*(1-p2)/50)
ci_95
## [1] 0.06036633 0.41963367
#95% confidence interval is 0.0603663, 0.4196337 Interpretation: 
#Our best estimate is an increase of 24% in pain relief with the new treatment, and with 95% confidence, the risk difference is between 6% and 42%. 
#Since the 95% confidence interval does not contain the null value of 0, we can conclude that there is a statistically significant improvement with the new treatment.


#ref https://rpubs.com/A000ANB/653745
#https://rpubs.com/A000ANB/653745
#https://sphweb.bumc.bu.edu/otlt/mph-modules/bs/bs704_confidence_intervals/BS704_Confidence_Intervals6.html