How to use a numeric data and convert the values to different bands. One typical example is the patient ages. In order to create statistics the age is often divided or binned into different age groups.
youtube video link with explanations for these examples https://youtu.be/Vdu9KtyquCM
This page is an accompaniment page for the above mentioned video.
Patients <- data.frame(floor(runif(100, min = 0, max = 110)))
names(Patients) <- c("AgeYears")
Using the cut statement we can create 5 years age groups. Display the AgeGroup field with the age.
# How to create agegroups for patients in R
# R code to categorize age into group or breaks
Patients$AgeGroup <- cut(Patients$AgeYears,
breaks = c(-Inf
,5 ,10 ,15,20,25,30,35,40,45,50,55,60 ,65,70,75,80,85
, Inf),
labels = c("0-4 years"
,"5-9 years","10-14 years","15-19 years","20-24 years"
,"25-29 years","30-34 years","35-39 years","40-44 years"
,"45-49 years","50-54 years","55-59 years","60-64 years"
,"65-69 years","70-74 years","75-79 years","80-84 years"
,"85+ years"),
right = FALSE)
Patients
FALSE AgeYears AgeGroup
FALSE 1 72 70-74 years
FALSE 2 48 45-49 years
FALSE 3 101 85+ years
FALSE 4 99 85+ years
FALSE 5 91 85+ years
FALSE 6 92 85+ years
FALSE 7 58 55-59 years
FALSE 8 54 50-54 years
FALSE 9 24 20-24 years
FALSE 10 19 15-19 years
FALSE 11 25 25-29 years
FALSE 12 109 85+ years
FALSE 13 75 75-79 years
FALSE 14 45 45-49 years
FALSE 15 60 60-64 years
FALSE 16 74 70-74 years
FALSE 17 56 55-59 years
FALSE 18 89 85+ years
FALSE 19 36 35-39 years
FALSE 20 24 20-24 years
FALSE 21 54 50-54 years
FALSE 22 77 75-79 years
FALSE 23 74 70-74 years
FALSE 24 83 80-84 years
FALSE 25 69 65-69 years
FALSE 26 82 80-84 years
FALSE 27 64 60-64 years
FALSE 28 106 85+ years
FALSE 29 0 0-4 years
FALSE 30 105 85+ years
FALSE 31 83 80-84 years
FALSE 32 57 55-59 years
FALSE 33 11 10-14 years
FALSE 34 105 85+ years
FALSE 35 77 75-79 years
FALSE 36 89 85+ years
FALSE 37 98 85+ years
FALSE 38 67 65-69 years
FALSE 39 77 75-79 years
FALSE 40 67 65-69 years
FALSE 41 71 70-74 years
FALSE 42 25 25-29 years
FALSE 43 105 85+ years
FALSE 44 60 60-64 years
FALSE 45 86 85+ years
FALSE 46 52 50-54 years
FALSE 47 104 85+ years
FALSE 48 5 5-9 years
FALSE 49 46 45-49 years
FALSE 50 65 65-69 years
FALSE 51 12 10-14 years
FALSE 52 82 80-84 years
FALSE 53 91 85+ years
FALSE 54 39 35-39 years
FALSE 55 53 50-54 years
FALSE 56 30 30-34 years
FALSE 57 69 65-69 years
FALSE 58 16 15-19 years
FALSE 59 69 65-69 years
FALSE 60 102 85+ years
FALSE 61 85 85+ years
FALSE 62 14 10-14 years
FALSE 63 13 10-14 years
FALSE 64 81 80-84 years
FALSE 65 47 45-49 years
FALSE 66 3 0-4 years
FALSE 67 45 45-49 years
FALSE 68 5 5-9 years
FALSE 69 22 20-24 years
FALSE 70 60 60-64 years
FALSE 71 45 45-49 years
FALSE 72 100 85+ years
FALSE 73 70 70-74 years
FALSE 74 78 75-79 years
FALSE 75 19 15-19 years
FALSE 76 39 35-39 years
FALSE 77 74 70-74 years
FALSE 78 9 5-9 years
FALSE 79 63 60-64 years
FALSE 80 37 35-39 years
FALSE 81 60 60-64 years
FALSE 82 44 40-44 years
FALSE 83 59 55-59 years
FALSE 84 49 45-49 years
FALSE 85 40 40-44 years
FALSE 86 87 85+ years
FALSE 87 64 60-64 years
FALSE 88 106 85+ years
FALSE 89 28 25-29 years
FALSE 90 86 85+ years
FALSE 91 60 60-64 years
FALSE 92 7 5-9 years
FALSE 93 34 30-34 years
FALSE 94 38 35-39 years
FALSE 95 100 85+ years
FALSE 96 47 45-49 years
FALSE 97 26 25-29 years
FALSE 98 28 25-29 years
FALSE 99 77 75-79 years
FALSE 100 95 85+ years
library(ggplot2)
pl <- ggplot(data = Patients,aes(x = AgeGroup))
pl <- pl + geom_bar()
pl <- pl + theme_minimal()
pl <- pl + theme(axis.text.x = element_text(angle = 90,hjust =0 ))
pl
library(ggplot2)
pl <- ggplot(data = Patients,aes(x = AgeGroup, fill =AgeGroup ))
pl <- pl + geom_bar()
pl <- pl + theme_minimal()
pl <- pl + theme(axis.text.x = element_text(angle = 90,hjust =0 ))
pl
Using the cut statement we can create 10 years age groups. Display the AgeGroup field with the age.
# How to create agegroups for patients in R
# R code to categorize age into group or breaks
Patients$AgeGroup <- cut(Patients$AgeYears,
breaks = c(-Inf,10 ,20,30,40,50,60 ,70,80,90, Inf) ,
labels = c("0-9 years"
,"10-19 years","20-29 years","30-39 years"
,"40-49 years","50-59 years","60-69 years"
,"70-79 years","80-89 years","90+ years"),
right = FALSE)
Patients
FALSE AgeYears AgeGroup
FALSE 1 72 70-79 years
FALSE 2 48 40-49 years
FALSE 3 101 90+ years
FALSE 4 99 90+ years
FALSE 5 91 90+ years
FALSE 6 92 90+ years
FALSE 7 58 50-59 years
FALSE 8 54 50-59 years
FALSE 9 24 20-29 years
FALSE 10 19 10-19 years
FALSE 11 25 20-29 years
FALSE 12 109 90+ years
FALSE 13 75 70-79 years
FALSE 14 45 40-49 years
FALSE 15 60 60-69 years
FALSE 16 74 70-79 years
FALSE 17 56 50-59 years
FALSE 18 89 80-89 years
FALSE 19 36 30-39 years
FALSE 20 24 20-29 years
FALSE 21 54 50-59 years
FALSE 22 77 70-79 years
FALSE 23 74 70-79 years
FALSE 24 83 80-89 years
FALSE 25 69 60-69 years
FALSE 26 82 80-89 years
FALSE 27 64 60-69 years
FALSE 28 106 90+ years
FALSE 29 0 0-9 years
FALSE 30 105 90+ years
FALSE 31 83 80-89 years
FALSE 32 57 50-59 years
FALSE 33 11 10-19 years
FALSE 34 105 90+ years
FALSE 35 77 70-79 years
FALSE 36 89 80-89 years
FALSE 37 98 90+ years
FALSE 38 67 60-69 years
FALSE 39 77 70-79 years
FALSE 40 67 60-69 years
FALSE 41 71 70-79 years
FALSE 42 25 20-29 years
FALSE 43 105 90+ years
FALSE 44 60 60-69 years
FALSE 45 86 80-89 years
FALSE 46 52 50-59 years
FALSE 47 104 90+ years
FALSE 48 5 0-9 years
FALSE 49 46 40-49 years
FALSE 50 65 60-69 years
FALSE 51 12 10-19 years
FALSE 52 82 80-89 years
FALSE 53 91 90+ years
FALSE 54 39 30-39 years
FALSE 55 53 50-59 years
FALSE 56 30 30-39 years
FALSE 57 69 60-69 years
FALSE 58 16 10-19 years
FALSE 59 69 60-69 years
FALSE 60 102 90+ years
FALSE 61 85 80-89 years
FALSE 62 14 10-19 years
FALSE 63 13 10-19 years
FALSE 64 81 80-89 years
FALSE 65 47 40-49 years
FALSE 66 3 0-9 years
FALSE 67 45 40-49 years
FALSE 68 5 0-9 years
FALSE 69 22 20-29 years
FALSE 70 60 60-69 years
FALSE 71 45 40-49 years
FALSE 72 100 90+ years
FALSE 73 70 70-79 years
FALSE 74 78 70-79 years
FALSE 75 19 10-19 years
FALSE 76 39 30-39 years
FALSE 77 74 70-79 years
FALSE 78 9 0-9 years
FALSE 79 63 60-69 years
FALSE 80 37 30-39 years
FALSE 81 60 60-69 years
FALSE 82 44 40-49 years
FALSE 83 59 50-59 years
FALSE 84 49 40-49 years
FALSE 85 40 40-49 years
FALSE 86 87 80-89 years
FALSE 87 64 60-69 years
FALSE 88 106 90+ years
FALSE 89 28 20-29 years
FALSE 90 86 80-89 years
FALSE 91 60 60-69 years
FALSE 92 7 0-9 years
FALSE 93 34 30-39 years
FALSE 94 38 30-39 years
FALSE 95 100 90+ years
FALSE 96 47 40-49 years
FALSE 97 26 20-29 years
FALSE 98 28 20-29 years
FALSE 99 77 70-79 years
FALSE 100 95 90+ years
# Create data
Patients2 <- data.frame( AgeYears = floor(runif(1000, min = 0, max = 110))
, Freq =floor(runif(1000, min = 100, max = 1000)))
head(Patients2)
FALSE AgeYears Freq
FALSE 1 26 974
FALSE 2 45 302
FALSE 3 14 215
FALSE 4 25 209
FALSE 5 75 130
FALSE 6 37 164
#Create 5 year age groups
#Using the cut statement we can create 5 years age groups. Display the AgeGroup field with the age.
# How to create agegroups for patients in R
# R code to categorize age into group or breaks
Patients2$AgeGroup <- cut(Patients2$AgeYears,
breaks = c(-Inf
,5 ,10 ,15,20,25,30,35,40,45,50,55,60 ,65,70,75,80,85
, Inf),
labels = c("0-4 years"
,"5-9 years","10-14 years","15-19 years","20-24 years"
,"25-29 years","30-34 years","35-39 years","40-44 years"
,"45-49 years","50-54 years","55-59 years","60-64 years"
,"65-69 years","70-74 years","75-79 years","80-84 years"
,"85+ years"),
right = FALSE)
library(dplyr)
PatientsGroups <- Patients2%>%
dplyr::group_by(AgeGroup)%>%
dplyr::summarise(Total = sum(Freq))
PatientsGroups
FALSE # A tibble: 18 x 2
FALSE AgeGroup Total
FALSE * <fct> <dbl>
FALSE 1 0-4 years 24042
FALSE 2 5-9 years 16209
FALSE 3 10-14 years 31044
FALSE 4 15-19 years 21828
FALSE 5 20-24 years 19566
FALSE 6 25-29 years 27683
FALSE 7 30-34 years 27601
FALSE 8 35-39 years 24354
FALSE 9 40-44 years 16266
FALSE 10 45-49 years 22739
FALSE 11 50-54 years 23738
FALSE 12 55-59 years 21052
FALSE 13 60-64 years 26158
FALSE 14 65-69 years 25163
FALSE 15 70-74 years 25034
FALSE 16 75-79 years 24118
FALSE 17 80-84 years 25220
FALSE 18 85+ years 130904
# reconfirm if the totals are same
sum(Patients$Freq)
FALSE [1] 0
sum(PatientsGroups$Total)
FALSE [1] 532719
library(ggplot2)
pl <- ggplot(data = Patients,aes(x = AgeGroup))
pl <- pl + geom_bar()
pl <- pl + theme_minimal()
pl <- pl + theme(axis.text.x = element_text(angle = 90,hjust =0 ))
pl
library(ggplot2)
pl <- ggplot(data = Patients,aes(x = AgeGroup, fill =AgeGroup ))
pl <- pl + geom_bar()
pl <- pl + theme_minimal()
pl <- pl + theme(axis.text.x = element_text(angle = 90,hjust =0 ))
pl
You can use the cut statement to cut or bin any numeric data into any sized bins or groups.
youtube video link with explanations for these examples https://youtu.be/Vdu9KtyquCM