> library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
> library(readr)
> IR <- read_csv("Data/Infection_Risk(1).csv")
Parsed with column specification:
cols(
  ID = col_double(),
  Stay = col_double(),
  Age = col_double(),
  InfctRsk = col_double(),
  Cultures = col_double(),
  Xrays = col_double(),
  Beds = col_double(),
  MedSchl = col_double(),
  Region = col_double(),
  Census = col_double(),
  Nurses = col_double(),
  Services = col_double()
)
> View(IR)

Question 1.

> summary(IR$InfctRsk)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  1.300   3.700   4.400   4.355   5.200   7.800 
> sd(IR$InfctRsk)
[1] 1.340908
> summary(IR$Nurses)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   14.0    66.0   132.0   173.2   218.0   656.0 
> sd(IR$Nurses)
[1] 139.2654

Question 2.

> boxplot(IR$InfctRsk)

> boxplot(IR$Nurses)

Question 3.

> hist(IR$InfctRsk)

> hist(IR$Nurses)

> #Looking at the Boxplots, Infection Risk is more normally distributed than Nurses. We can confirm this by comparing median and mean of the variables. The median of Infection risk is closer to it's mean than Nurses Median and Mean. The boxplots and histograms further confirm this by displaying a higher level of skewness for Nurses.

Question 4.

> mean(IR$InfctRsk[IR$Age>60])
[1] 4.766667
> mean(IR$InfctRsk[IR$Age<50])
[1] 4.428571

Question 5.

> IR$Region<- factor(IR$Region)
> tapply(IR$InfctRsk,IR$Region,mean)
       1        2        3        4 
4.860714 4.393750 3.927027 4.381250 
> tapply(IR$Nurses,IR$Region,mean)
       1        2        3        4 
190.6071 185.5000 160.5946 147.6250 

Question 6.

> a <- mean(IR$InfctRsk)
> s <- sd(IR$InfctRsk)
> n <- nrow(IR)
> error <- qnorm(0.975)*s/sqrt(n-1)
> left <- a-error
> right <- a+error
> left
[1] 4.106532
>  right
[1] 4.603202
> a <- mean(IR$Nurses)
> s <- sd(IR$Nurses)
> n <- nrow(IR)
> error <- qnorm(0.975)*s/sqrt(n-1)
> left <- a-error
> right <- a+error
> left
[1] 147.456
>  right
[1] 199.0396

Question 7.

> # I am 95% confident that the population mean for infection Risk is between 4.1 and 4.6
> 
> #I am 95% confident that the population mean for Nurses is between 147.5 and 199