Data is for a sample of 200 patients at an Intensive Care Unit.
# Read ICU csv file
theURL <- "https://vincentarelbundock.github.io/Rdatasets/csv/Stat2Data/ICU.csv"
ICU_patients_DF <- read.table(file=theURL, header=TRUE, sep=",")
dim(ICU_patients_DF)
## [1] 200 10
head(ICU_patients_DF)
## X ID Survive Age AgeGroup Sex Infection SysBP Pulse Emergency
## 1 1 4 0 87 3 1 1 80 96 1
## 2 2 8 1 27 1 1 1 142 88 1
## 3 3 12 1 59 2 0 0 112 80 1
## 4 4 14 1 77 3 0 0 100 70 0
## 5 5 27 0 76 3 1 1 128 90 1
## 6 6 28 1 54 2 0 1 142 103 1
tail(ICU_patients_DF)
## X ID Survive Age AgeGroup Sex Infection SysBP Pulse Emergency
## 195 195 915 1 67 2 0 0 152 78 0
## 196 196 921 0 50 2 1 0 256 64 1
## 197 197 923 1 20 1 0 0 104 83 1
## 198 198 924 1 73 3 1 0 162 100 1
## 199 199 925 1 59 2 0 0 100 88 1
## 200 200 929 1 42 1 0 0 122 84 1
names (ICU_patients_DF)
## [1] "X" "ID" "Survive" "Age" "AgeGroup" "Sex"
## [7] "Infection" "SysBP" "Pulse" "Emergency"
#is.character(ICU_patients_DF$AgeGroup)
#is.numeric(ICU_patients_DF$AgeGroup)
str(ICU_patients_DF) # examine data.frame structure
## 'data.frame': 200 obs. of 10 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ ID : int 4 8 12 14 27 28 32 38 40 41 ...
## $ Survive : int 0 1 1 1 0 1 1 1 1 1 ...
## $ Age : int 87 27 59 77 76 54 87 69 63 30 ...
## $ AgeGroup : int 3 1 2 3 3 2 3 2 2 1 ...
## $ Sex : int 1 1 0 0 1 0 1 0 0 1 ...
## $ Infection: int 1 1 0 0 1 1 1 1 0 0 ...
## $ SysBP : int 80 142 112 100 128 142 110 110 104 144 ...
## $ Pulse : int 96 88 80 70 90 103 154 132 66 110 ...
## $ Emergency: int 1 1 1 0 1 1 1 1 0 1 ...
class(ICU_patients_DF)
## [1] "data.frame"
# load packages
library(plyr) # load this before dplyr
library(dplyr) # load plyr first then this pkg
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(magrittr)
# library(tidyverse) # includes stringr ?incl in other pkgs??
library(stringr) # for string manipulation
cat ("Number of ICU patients: ", as.character(count(ICU_patients_DF)))
## Number of ICU patients: 200
# Use the summary function to gain an overview of the data set.
ICU_patients_DF %>%
group_by(AgeGroup) %>%
summarize(AvgBloodPress=mean(SysBP))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 3 x 2
## AgeGroup AvgBloodPress
## <int> <dbl>
## 1 1 130.
## 2 2 133.
## 3 3 134.
# Then display the mean and median for at least 2 attributes
cat ("Mean Systolic BP of ICU patients: ", mean(ICU_patients_DF$SysBP))
## Mean Systolic BP of ICU patients: 132.28
cat ("Median Systolic BP of ICU patients: ", median(ICU_patients_DF$SysBP))
## Median Systolic BP of ICU patients: 130
Select a list of ICU patients with a [High] Systolic Blood Pressure > 120 mmHg
# create a subset of patients with high systolic blood pressure
HBP_patients_DF <- subset(ICU_patients_DF, SysBP > 120, select = Age:Pulse)
#HBP_patients_DF
head(HBP_patients_DF)
## Age AgeGroup Sex Infection SysBP Pulse
## 2 27 1 1 1 142 88
## 5 76 3 1 1 128 90
## 6 54 2 0 1 142 103
## 10 30 1 1 0 144 110
## 12 78 3 0 1 130 132
## 13 70 3 1 0 138 103
tail(HBP_patients_DF)
## Age AgeGroup Sex Infection SysBP Pulse
## 192 46 1 0 1 142 89
## 194 71 3 0 1 124 124
## 195 67 2 0 0 152 78
## 196 50 2 1 0 256 64
## 198 73 3 1 0 162 100
## 200 42 1 0 0 122 84
cat ("Number of ICU patients with High Systolic Blood Pressure > 120 mmHg: ", as.character(count(HBP_patients_DF)))
## Number of ICU patients with High Systolic Blood Pressure > 120 mmHg: 127
Notes:
cat ("Number of patients with High BP: ", as.character(count(HBP_patients_DF)))
## Number of patients with High BP: 127
cat("Percent of ICU patients with high Systolic Blood Pressure: ", as.character((count(HBP_patients_DF) / count(ICU_patients_DF) * 100)))
## Percent of ICU patients with high Systolic Blood Pressure: 63.5
cat ("Mean Systolic Blood Pressure of patients with high BP: ", mean(HBP_patients_DF$SysBP))
## Mean Systolic Blood Pressure of patients with high BP: 150.6063
cat ("Median Systolic Blood Pressure of patients with high BP: ",
median(HBP_patients_DF$SysBP))
## Median Systolic Blood Pressure of patients with high BP: 142
cat ("Mean Age of patients with high Systolic Blood Pressure: ",
mean(HBP_patients_DF$Age))
## Mean Age of patients with high Systolic Blood Pressure: 58.55118
cat ("Median Age of patients with high Systolic Blood Pressure: ",
median(HBP_patients_DF$Age))
## Median Age of patients with high Systolic Blood Pressure: 64
#compare
if (mean(HBP_patients_DF$SysBP) > mean(ICU_patients_DF$SysBP)) {
print ("There is a higher mean Systolic BP for high BP patients")
} else if (mean(HBP_patients_DF$SysBP) < mean(ICU_patients_DF$SysBP)) {
print ("There is a lower mean Systolic BP for high BP patients compared to non high BP patients in ICU")
}
## [1] "There is a higher mean Systolic BP for high BP patients"
if (median(HBP_patients_DF$SysBP) > median(ICU_patients_DF$SysBP)) {
print ("There is a higher median Systolic BP for high BP patients")
} else if (median(HBP_patients_DF$SysBP) < median(ICU_patients_DF$SysBP)) {
print ("There is a lower median Systolic BP for high BP patients compared to non high BP patients in ICU")
}
## [1] "There is a higher median Systolic BP for high BP patients"
Notes:
age_young <- c("Young (under 50)")
age_middle <- c("Middle (50-69)")
age_old <- c("Old (70+)")
age_unknown <- c("Age Group Unknown")
HBP_patients_DF$AgeGroup
## [1] 1 3 2 1 3 3 2 1 2 2 2 2 2 2 3 2 1 1 3 2 2 2 3 2 2 3 3 3 3 2 3 2 2 3 2 1 1
## [38] 2 1 2 1 2 2 2 3 3 2 2 1 3 3 3 2 3 3 2 1 1 2 1 1 3 1 2 2 3 3 2 2 2 2 3 2 1
## [75] 1 3 3 3 3 1 1 3 1 1 3 3 3 3 3 2 1 2 1 3 3 1 3 1 3 1 3 2 2 3 2 2 1 3 1 2 1
## [112] 3 1 1 1 1 1 2 2 3 3 1 3 2 2 3 1
is.numeric(HBP_patients_DF$AgeGroup)
## [1] TRUE
class(HBP_patients_DF)
## [1] "data.frame"
class(HBP_patients_DF$AgeGroup)
## [1] "integer"
length(HBP_patients_DF$AgeGroup)
## [1] 127
#print (HBP_patients_DF$AgeGroup)
vec <- HBP_patients_DF$AgeGroup
#vec
#class(vec)
#print ("start")
ChgAgeGroup <- function(vec) {
# print ("function")
newvec <- vec
# print (newvec)
for (i in 1:length(vec)) {
# print (vec[i])
if(vec[i] == "1") {
# print ("cond 1")
newvec [i] <- age_young
}else if(vec[i] == "2") {
# print ("cond 2")
newvec [i] <- age_middle
}else if(vec[i] == "3") {
# print ("cond 3")
newvec [i] <- age_old
}else {
newvec [i] <- age_unknown
}
}
# print ("end func")
# print (i)
# print (newvec)
return(vec <-newvec)
}
HBP_patients_DF$AgeGroup <- (ChgAgeGroup(vec))
#print ("post func")
#HBP_patients_DF$AgeGroup
head(HBP_patients_DF)
## Age AgeGroup Sex Infection SysBP Pulse
## 2 27 Young (under 50) 1 1 142 88
## 5 76 Old (70+) 1 1 128 90
## 6 54 Middle (50-69) 0 1 142 103
## 10 30 Young (under 50) 1 0 144 110
## 12 78 Old (70+) 0 1 130 132
## 13 70 Old (70+) 1 0 138 103
tail(HBP_patients_DF)
## Age AgeGroup Sex Infection SysBP Pulse
## 192 46 Young (under 50) 0 1 142 89
## 194 71 Old (70+) 0 1 124 124
## 195 67 Middle (50-69) 0 0 152 78
## 196 50 Middle (50-69) 1 0 256 64
## 198 73 Old (70+) 1 0 162 100
## 200 42 Young (under 50) 0 0 122 84
colnames(HBP_patients_DF) <- c("Pt Age","AgeGroup","Gender","Infection Pt","Systolic BP","HR")
head(HBP_patients_DF)
## Pt Age AgeGroup Gender Infection Pt Systolic BP HR
## 2 27 Young (under 50) 1 1 142 88
## 5 76 Old (70+) 1 1 128 90
## 6 54 Middle (50-69) 0 1 142 103
## 10 30 Young (under 50) 1 0 144 110
## 12 78 Old (70+) 0 1 130 132
## 13 70 Old (70+) 1 0 138 103
Looking forward to your feedback! Thank you, Rick