> library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
> library(readr)
> IR <- read_csv("Data/Infection_Risk(1).csv")
Parsed with column specification:
cols(
ID = col_double(),
Stay = col_double(),
Age = col_double(),
InfctRsk = col_double(),
Cultures = col_double(),
Xrays = col_double(),
Beds = col_double(),
MedSchl = col_double(),
Region = col_double(),
Census = col_double(),
Nurses = col_double(),
Services = col_double()
)
> View(IR)
Question 1.
> summary(IR$InfctRsk)
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.300 3.700 4.400 4.355 5.200 7.800
> sd(IR$InfctRsk)
[1] 1.340908
> summary(IR$Nurses)
Min. 1st Qu. Median Mean 3rd Qu. Max.
14.0 66.0 132.0 173.2 218.0 656.0
> sd(IR$Nurses)
[1] 139.2654
Question 2.
> boxplot(IR$InfctRsk)

> boxplot(IR$Nurses)

Question 3.
> hist(IR$InfctRsk)

> hist(IR$Nurses)

> #Looking at the Boxplots, Infection Risk is more normally distributed than Nurses. We can confirm this by comparing median and mean of the variables. The median of Infection risk is closer to it's mean than Nurses Median and Mean. The boxplots and histograms further confirm this by displaying a higher level of skewness for Nurses.
Question 4.
> mean(IR$InfctRsk[IR$Age>60])
[1] 4.766667
> mean(IR$InfctRsk[IR$Age<50])
[1] 4.428571
Question 5.
> IR$Region<- factor(IR$Region)
> tapply(IR$InfctRsk,IR$Region,mean)
1 2 3 4
4.860714 4.393750 3.927027 4.381250
> tapply(IR$Nurses,IR$Region,mean)
1 2 3 4
190.6071 185.5000 160.5946 147.6250
Question 6.
> a <- mean(IR$InfctRsk)
> s <- sd(IR$InfctRsk)
> n <- nrow(IR)
> error <- qnorm(0.975)*s/sqrt(n-1)
> left <- a-error
> right <- a+error
> left
[1] 4.106532
> right
[1] 4.603202
> a <- mean(IR$Nurses)
> s <- sd(IR$Nurses)
> n <- nrow(IR)
> error <- qnorm(0.975)*s/sqrt(n-1)
> left <- a-error
> right <- a+error
> left
[1] 147.456
> right
[1] 199.0396
Question 7.
> # I am 95% confident that the population mean for infection Risk is between 4.1 and 4.6
>
> #I am 95% confident that the population mean for Nurses is between 147.5 and 199