# Final Assignment (#6) Fall 2021
# Question 4 -- Skin Cancer and Table 1

# PART A
# The expected counts for two cells in the 4 x 2 table examining
# the association between the number of times during residency 
# observing skin cancer examination and residency program type are 
# less than five.  This explains why the p-value of 0.91 which 
# corresponds to Fisher's exact test was used.

# The Data
obs.skin <- rbind(c(16,47),c(11,38),c(4,10),c(3,8))
# print data
obs.skin
##      [,1] [,2]
## [1,]   16   47
## [2,]   11   38
## [3,]    4   10
## [4,]    3    8
# analyze data
# chi square
chisq.test(obs.skin,correct=FALSE)
## Warning in chisq.test(obs.skin, correct = FALSE): Chi-squared approximation may
## be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  obs.skin
## X-squared = 0.29993, df = 3, p-value = 0.96
# Fisher's Exact Test
fisher.test(obs.skin)
## 
##  Fisher's Exact Test for Count Data
## 
## data:  obs.skin
## p-value = 0.9124
## alternative hypothesis: two.sided
# PART B
# The p-value of 0.71 examining the association between having a 
# Dermatology elective in medical school or residency and residency 
# program type corresponds to the p-value calculated from 
# Fisher's exact test.

# Lets create the 2x2 table called derm
derm <- rbind(c(12,35),c(22,75))

# print derm
derm
##      [,1] [,2]
## [1,]   12   35
## [2,]   22   75
# chi square 
chisq.test(derm,correct=FALSE)
## 
##  Pearson's Chi-squared test
## 
## data:  derm
## X-squared = 0.14273, df = 1, p-value = 0.7056
# Fisher's Exact Test
fisher.test(derm)
## 
##  Fisher's Exact Test for Count Data
## 
## data:  derm
## p-value = 0.8345
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.4703129 2.7996835
## sample estimates:
## odds ratio 
##   1.167537
# Part C
# If the 3 x 2 table examining the association between 
# self-assessed level of performing SCE and residency program 
# type was collapsed so that the response categories "Very and 
# Somewhat Unskilled" and "Neither Skilled nor Unskilled" were 
# combined, the resulting 2 x 2 table could be created and 
# displayed in R using the following syntax:  
# skilled <- rbind(c(12+39,16+54),c(6,9))
# skilled

# syntax
skilled <- rbind(c(12+39,16+54),c(6,9))
skilled
##      [,1] [,2]
## [1,]   51   70
## [2,]    6    9
# PART D
# If we were to compare the proportion of Family Medicine 
# residents having a Dermatology elective in medical school 
# or residency with the proportion of Internal Medicine residents 
# having a Dermatology elective in medical school or residency using 
# a homogeneity of proportions test (without continuity correction), 
# the point estimate for the difference in proportions and corresponding 
# confidence interval for that difference would be 0.035 with 95% 
# confidence interval from -0.14 to 0.22.

# Homogeneity of Proportions Test: 
compare.elective <- prop.test(x=c(12,35),n=c(34,110),correct=FALSE)
compare.elective
## 
##  2-sample test for equality of proportions without continuity
##  correction
## 
## data:  c(12, 35) out of c(34, 110)
## X-squared = 0.14273, df = 1, p-value = 0.7056
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  -0.1479391  0.2174578
## sample estimates:
##    prop 1    prop 2 
## 0.3529412 0.3181818
# difference in proportions
compare.elective$estimate[1]-compare.elective$estimate[2]
##     prop 1 
## 0.03475936
# End of Program