library(readr)
Infection_Risk<- read_csv("Infection_Risk.csv")
## Parsed with column specification:
## cols(
## ID = col_double(),
## Stay = col_double(),
## Age = col_double(),
## InfctRsk = col_double(),
## Cultures = col_double(),
## Xrays = col_double(),
## Beds = col_double(),
## MedSchl = col_double(),
## Region = col_double(),
## Census = col_double(),
## Nurses = col_double(),
## Services = col_double()
## )
View(Infection_Risk)
str(Infection_Risk)
## tibble [113 x 12] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ ID : num [1:113] 1 2 3 4 5 6 7 8 9 10 ...
## $ Stay : num [1:113] 7.13 8.82 8.34 8.95 11.2 ...
## $ Age : num [1:113] 55.7 58.2 56.9 53.7 56.5 ...
## $ InfctRsk: num [1:113] 4.1 1.6 2.7 5.6 5.7 ...
## $ Cultures: num [1:113] 9 3.8 8.1 18.9 34.5 ...
## $ Xrays : num [1:113] 39.6 51.7 74 122.8 88.9 ...
## $ Beds : num [1:113] 279 80 107 147 180 150 186 640 182 85 ...
## $ MedSchl : num [1:113] 2 2 2 2 2 2 2 1 2 2 ...
## $ Region : num [1:113] 4 2 3 4 1 2 3 2 3 1 ...
## $ Census : num [1:113] 207 51 82 53 134 147 151 399 130 59 ...
## $ Nurses : num [1:113] 241 52 54 148 151 106 129 360 118 66 ...
## $ Services: num [1:113] 60 40 20 40 40 40 40 60 40 40 ...
## - attr(*, "spec")=
## .. cols(
## .. ID = col_double(),
## .. Stay = col_double(),
## .. Age = col_double(),
## .. InfctRsk = col_double(),
## .. Cultures = col_double(),
## .. Xrays = col_double(),
## .. Beds = col_double(),
## .. MedSchl = col_double(),
## .. Region = col_double(),
## .. Census = col_double(),
## .. Nurses = col_double(),
## .. Services = col_double()
## .. )
mean(Infection_Risk$InfctRsk)
## [1] 4.354867
mean(Infection_Risk$Nurses)
## [1] 173.2478
max(Infection_Risk$InfctRsk)
## [1] 7.8
max(Infection_Risk$Nurses)
## [1] 656
min(Infection_Risk$InfctRsk)
## [1] 1.3
min(Infection_Risk$Nurses)
## [1] 14
sd(Infection_Risk$InfctRsk)
## [1] 1.340908
sd(Infection_Risk$Nurses)
## [1] 139.2654
boxplot(Infection_Risk$InfctRsk)
boxplot(Infection_Risk$Nurses)
## Question 3
## The distribution for the variable infectionrisk is much more evenly distributed vs the number of nurses in the hospital. With the former of the two variables having scores on both sides of the mean, with the number of nurses having more of a skewed distribution, with some outliers that fall mroe outside of the mean score.
library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Infection_Risk %>%
filter(Age>60) %>%
summarize(mean=mean(InfctRsk, na.rm=T), sd=sd(InfctRsk, na.rm=T) )
## # A tibble: 1 x 2
## mean sd
## <dbl> <dbl>
## 1 4.77 2.34
Infection_Risk %>%
filter(Age<50) %>%
summarize(mean=mean(InfctRsk, na.rm=T), sd=sd(InfctRsk, na.rm=T) )
## # A tibble: 1 x 2
## mean sd
## <dbl> <dbl>
## 1 4.43 1.10
Infection_Risk$Region<-factor(Infection_Risk$Region,
levels=c(1,2,3,4),
labels=c("Northeastern","Northcentral","South", "West" ))
summary(Infection_Risk$Region)
## Northeastern Northcentral South West
## 28 32 37 16
library(readr)
library(dplyr)
Infection_Risk %>%
filter(InfctRsk>=1) %>%
group_by(Region) %>%
summarise(mean=mean(InfctRsk, na.rm=T), sd=sd(InfctRsk, na.rm=T) )
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 4 x 3
## Region mean sd
## <fct> <dbl> <dbl>
## 1 Northeastern 4.86 1.27
## 2 Northcentral 4.39 1.34
## 3 South 3.93 1.46
## 4 West 4.38 0.877
Infection_Risk %>%
filter(Nurses>=1) %>%
group_by(Region) %>%
summarise(mean=mean(Nurses, na.rm=T), sd=sd(Nurses, na.rm=T) )
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 4 x 3
## Region mean sd
## <fct> <dbl> <dbl>
## 1 Northeastern 191. 146.
## 2 Northcentral 186. 134.
## 3 South 161. 136.
## 4 West 148. 152.
##Calculate means
MeanInfctRsk<-mean(Infection_Risk$InfctRsk)
MeanNurses<-mean(Infection_Risk$Nurses)
##Calculate standard deviations
SDInfctRsk<-sd(Infection_Risk$InfctRsk)
SDNurses<-sd(Infection_Risk$Nurses)
##Identify number of records
IFRecords<-length(Infection_Risk$InfctRsk)
NRecords<-length(Infection_Risk$Nurses)
##Calculate error at 95% confidence interval
ErrorInfctRsk<-qnorm(0.975)*SDInfctRsk/sqrt(IFRecords)
ErrorNurses<-qnorm(0.975)*SDNurses/sqrt(NRecords)
##Calculate low end value of confidence interval
LeftInfctRsk<-MeanInfctRsk-ErrorInfctRsk
LeftNurses<-MeanNurses-ErrorNurses
##Calculate high end value of confidence interval
RightInfctRsk<-MeanInfctRsk+ErrorInfctRsk
RightNurses<-MeanNurses+ErrorNurses
##See Confidence interval values
print(LeftInfctRsk)
## [1] 4.107633
print(RightInfctRsk)
## [1] 4.602101
print(LeftNurses)
## [1] 147.5703
print(RightNurses)
## [1] 198.9252
## With 95% confidence we can say the mean scores for infection risk will fall in the ranges of 4.10 and 4.6. While for nurses in a hospital with 95% confidence the average number of nurses per hospital will fall between 147.57 and 198.92