library(haven)
library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(ipumsr)
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
mydata<- read_csv("C:/Users/chris/Downloads/Infection_Risk(1).csv")
## Parsed with column specification:
## cols(
## ID = col_double(),
## Stay = col_double(),
## Age = col_double(),
## InfctRsk = col_double(),
## Cultures = col_double(),
## Xrays = col_double(),
## Beds = col_double(),
## MedSchl = col_double(),
## Region = col_double(),
## Census = col_double(),
## Nurses = col_double(),
## Services = col_double()
## )
View(mydata)
assignment2<-subset(mydata,select=c("InfctRsk","Nurses"))
View(assignment2)
describe(assignment2$InfctRsk)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 113 4.35 1.34 4.4 4.38 1.19 1.3 7.8 6.5 -0.12 0.07 0.13
describe(assignment2$Nurses)
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 113 173.25 139.27 132 151.96 103.78 14 656 642 1.34 1.35
## se
## X1 13.1
boxplot(assignment2$InfctRsk, main="Boxplot for InfctRsk")
boxplot(assignment2$Nurses, main="Boxplot for Nurses")
The boxplot for infctRsk (infection risk at the hospital) has outliers that are very condense, describes the minimum value, quartle 1 value, the median value, quartile 3 value, and the maximum value. Just like the boxplot for infctRsk, the boxplot for Nurses also has outliers, however, i can observe that there is a greater variability with the boxplot for nurses as well as larger outliers. Therefore,i can conclude that their median do differ.The median divides the observation into half. That is, half the scores are greater than or equal to this value and half are less. The quarter 1 divides the lower 25% of the data while the quarter 3 divides the upper 25% of observation from the data. The maximum value represent the upper limits of the observations which are not outliers. With reference to the minimum value for the InfctRsk the minimum relates to the limits observations which are not outliers.
Both boxplots are not normally distributed. Though the boxplot for InfctRsk has a median which is in the middle of the box, the whiskers are not about the same on both sides of the box. As a result, the distribution is not symmetric. The boxplot for nurses on the other hand have a distribution that is positively skewed because the median is closer to the bottom of the box.
parentAge<- subset(mydata, select=c("InfctRsk","Age"))
parentsover60<- subset(parentAge,Age>60,na.rm = T)
View(parentsover60)
mean(parentsover60$InfctRsk)
## [1] 4.766667
parentsbelow50<-subset(parentAge,Age<50,na.rm =T)
View(parentsbelow50)
mean(parentsbelow50$InfctRsk)
## [1] 4.428571
wadi<- subset(mydata, select=c("InfctRsk","Region","Nurses"))
View(wadi)
Region<-data.frame(wadi)
Region %>%
group_by(Region) %>%
summarize(mean=mean(InfctRsk))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 4 x 2
## Region mean
## <dbl> <dbl>
## 1 1 4.86
## 2 2 4.39
## 3 3 3.93
## 4 4 4.38
Region %>%
group_by(Region)%>%
summarize(mean=mean(Nurses, na.rm=T))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 4 x 2
## Region mean
## <dbl> <dbl>
## 1 1 191.
## 2 2 186.
## 3 3 161.
## 4 4 148.
## CI for InfctRsk
a <- 4.35
s <- 1.34
n <- 113
error <- qnorm(0.975)*s/sqrt(n)
left <- a-error
right <- a+error
print(c(left,right))
## [1] 4.102934 4.597066
## CI for Nurses
a <- 173.25
s <- 139.27
n <- 113
error <- qnorm(0.975)*s/sqrt(n)
leftn <- a-error
rightn <- a+error
print(c(leftn,rightn))
## [1] 147.5717 198.9283