One of the challenges in working with data is wrangling. In this assignment we will use R to perform this task.
# import file, set header = FALSE to get row name header
channing <- read.csv (file = 'C:\\Users\\Home\\channing.csv', header = TRUE, sep = ",")
summary (channing) #get overview of the data
## X sex entry exit
## Min. : 1.0 Length:462 Min. : 733.0 Min. : 777
## 1st Qu.:116.2 Class :character 1st Qu.: 854.0 1st Qu.: 939
## Median :231.5 Mode :character Median : 900.5 Median : 990
## Mean :231.5 Mean : 905.9 Mean : 986
## 3rd Qu.:346.8 3rd Qu.: 956.0 3rd Qu.:1031
## Max. :462.0 Max. :1140.0 Max. :1207
## time cens
## Min. : 0.00 Min. :0.000
## 1st Qu.: 35.00 1st Qu.:0.000
## Median : 82.00 Median :0.000
## Mean : 80.34 Mean :0.381
## 3rd Qu.:137.00 3rd Qu.:1.000
## Max. :137.00 Max. :1.000
entry_mean <- mean (channing$entry, na.rm = TRUE) #mean for age at entry in months
entry_median <- median (channing$entry, na.rm = TRUE) #median for age at entry in months
head (channing)
## X sex entry exit time cens
## 1 1 Male 782 909 127 1
## 2 2 Male 1020 1128 108 1
## 3 3 Male 856 969 113 1
## 4 4 Male 915 957 42 1
## 5 5 Male 863 983 120 1
## 6 6 Male 906 1012 106 1
# Get Female residents who died while in Channing, and save dataframe
channing_female_deaths <-subset(channing, sex == 'Female' & cens == '1')
head (channing_female_deaths) #get a glimpse of the data
## X sex entry exit time cens
## 98 98 Female 1042 1172 130 1
## 99 99 Female 921 1040 119 1
## 100 100 Female 885 1003 118 1
## 101 101 Female 901 1018 117 1
## 102 102 Female 808 932 124 1
## 103 103 Female 915 1004 89 1
summary (channing_female_deaths)
## X sex entry exit
## Min. : 98.0 Length:130 Min. : 748.0 Min. : 804.0
## 1st Qu.:130.2 Class :character 1st Qu.: 878.5 1st Qu.: 945.0
## Median :383.5 Mode :character Median : 930.5 Median :1000.5
## Mean :285.3 Mean : 930.0 Mean : 999.4
## 3rd Qu.:420.8 3rd Qu.: 976.0 3rd Qu.:1041.0
## Max. :462.0 Max. :1140.0 Max. :1200.0
## time cens
## Min. : 2.00 Min. :1
## 1st Qu.: 39.00 1st Qu.:1
## Median : 69.50 Median :1
## Mean : 70.15 Mean :1
## 3rd Qu.:107.75 3rd Qu.:1
## Max. :136.00 Max. :1
channing_female_deaths_mod <- plyr::rename(channing_female_deaths, c(
"entry" = "Age_at_entry",
"exit" = "Age_at_death",
"time" = "Length_of_Stay",
"cens" = "Alive_YN"))
head (channing_female_deaths_mod)
## X sex Age_at_entry Age_at_death Length_of_Stay Alive_YN
## 98 98 Female 1042 1172 130 1
## 99 99 Female 921 1040 119 1
## 100 100 Female 885 1003 118 1
## 101 101 Female 901 1018 117 1
## 102 102 Female 808 932 124 1
## 103 103 Female 915 1004 89 1
summary (channing_female_deaths_mod)
## X sex Age_at_entry Age_at_death
## Min. : 98.0 Length:130 Min. : 748.0 Min. : 804.0
## 1st Qu.:130.2 Class :character 1st Qu.: 878.5 1st Qu.: 945.0
## Median :383.5 Mode :character Median : 930.5 Median :1000.5
## Mean :285.3 Mean : 930.0 Mean : 999.4
## 3rd Qu.:420.8 3rd Qu.: 976.0 3rd Qu.:1041.0
## Max. :462.0 Max. :1140.0 Max. :1200.0
## Length_of_Stay Alive_YN
## Min. : 2.00 Min. :1
## 1st Qu.: 39.00 1st Qu.:1
## Median : 69.50 Median :1
## Mean : 70.15 Mean :1
## 3rd Qu.:107.75 3rd Qu.:1
## Max. :136.00 Max. :1
mean_entry_age_F <- mean (channing_female_deaths_mod$Age_at_entry, na.rm = TRUE) #mean for age at entry in months
sprintf(paste("The mean age at entry of Channing female residents is ", mean_entry_age_F))
## [1] "The mean age at entry of Channing female residents is 930.046153846154"
sprintf(paste("The mean of age at entry of all Channing residents is ", entry_mean))
## [1] "The mean of age at entry of all Channing residents is 905.878787878788"
sprintf(paste("The age for female residents, 930.04 months or 77.5 years based on 130 observations is higher than that of all residents, 905.9 months or 75.49 years because the females had a longer life on average than the males."))
## [1] "The age for female residents, 930.04 months or 77.5 years based on 130 observations is higher than that of all residents, 905.9 months or 75.49 years because the females had a longer life on average than the males."
median_entry_age_F <- median (channing_female_deaths_mod$Age_at_entry, na.rm = TRUE) #median for age at entry in months
sprintf(paste("The median age at entry of Channing female residents is ", median_entry_age_F))
## [1] "The median age at entry of Channing female residents is 930.5"
sprintf(paste("The median of age at entry of all Channing residents is ", entry_median))
## [1] "The median of age at entry of all Channing residents is 900.5"
sprintf(paste("The median age for female residents, 930.5 months or 77.5 years based on 130 observations is higher than that of all residents, 900.5 months or 75.4 years because there are more males in the overall group or 462 observations"))
## [1] "The median age for female residents, 930.5 months or 77.5 years based on 130 observations is higher than that of all residents, 900.5 months or 75.4 years because there are more males in the overall group or 462 observations"
# Change Female to F
channing_female_deaths_mod$sex [channing_female_deaths_mod$sex == "Female"] <- "F"
#change type of Alive_YN from int to char
nchar(channing_female_deaths_mod$Alive_YN)
## [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [38] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [75] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [112] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
#change 1 to N in Alive_YN column
channing_female_deaths_mod$Alive_YN [channing_female_deaths_mod$Alive_YN == "1"] <- "N"
head (channing_female_deaths_mod)
## X sex Age_at_entry Age_at_death Length_of_Stay Alive_YN
## 98 98 F 1042 1172 130 N
## 99 99 F 921 1040 119 N
## 100 100 F 885 1003 118 N
## 101 101 F 901 1018 117 N
## 102 102 F 808 932 124 N
## 103 103 F 915 1004 89 N
channing_github <- read.csv("https://raw.githubusercontent.com/carolc57/Summer_23_Bridge/main/channing.csv")
head(channing_github) #show "channing.csv" from github file
## X sex entry exit time cens
## 1 1 Male 782 909 127 1
## 2 2 Male 1020 1128 108 1
## 3 3 Male 856 969 113 1
## 4 4 Male 915 957 42 1
## 5 5 Male 863 983 120 1
## 6 6 Male 906 1012 106 1