Objective

How to use a numeric data and convert the values to different bands. One typical example is the patient ages. In order to create statistics the age is often divided or binned into different age groups.

youtube video link with explanations for these examples https://youtu.be/Vdu9KtyquCM

This page is an accompaniment page for the above mentioned video.

Create the sample data

Patients <-   data.frame(floor(runif(100, min = 0, max = 110)))
names(Patients) <- c("AgeYears")

Create 5 year age groups

Using the cut statement we can create 5 years age groups. Display the AgeGroup field with the age.

# How to create agegroups for patients in R
# R code to categorize age into group or breaks


Patients$AgeGroup <- cut(Patients$AgeYears, 
                         breaks = c(-Inf
                                    ,5 ,10 ,15,20,25,30,35,40,45,50,55,60 ,65,70,75,80,85
                                    , Inf), 
                         
                         labels = c("0-4 years"
                                    ,"5-9 years","10-14 years","15-19 years","20-24 years"
                                    ,"25-29 years","30-34 years","35-39 years","40-44 years"
                                    ,"45-49 years","50-54 years","55-59 years","60-64 years"
                                    ,"65-69 years","70-74 years","75-79 years","80-84 years"
                                    ,"85+ years"),
                         right = FALSE)



Patients
FALSE     AgeYears    AgeGroup
FALSE 1         72 70-74 years
FALSE 2         48 45-49 years
FALSE 3        101   85+ years
FALSE 4         99   85+ years
FALSE 5         91   85+ years
FALSE 6         92   85+ years
FALSE 7         58 55-59 years
FALSE 8         54 50-54 years
FALSE 9         24 20-24 years
FALSE 10        19 15-19 years
FALSE 11        25 25-29 years
FALSE 12       109   85+ years
FALSE 13        75 75-79 years
FALSE 14        45 45-49 years
FALSE 15        60 60-64 years
FALSE 16        74 70-74 years
FALSE 17        56 55-59 years
FALSE 18        89   85+ years
FALSE 19        36 35-39 years
FALSE 20        24 20-24 years
FALSE 21        54 50-54 years
FALSE 22        77 75-79 years
FALSE 23        74 70-74 years
FALSE 24        83 80-84 years
FALSE 25        69 65-69 years
FALSE 26        82 80-84 years
FALSE 27        64 60-64 years
FALSE 28       106   85+ years
FALSE 29         0   0-4 years
FALSE 30       105   85+ years
FALSE 31        83 80-84 years
FALSE 32        57 55-59 years
FALSE 33        11 10-14 years
FALSE 34       105   85+ years
FALSE 35        77 75-79 years
FALSE 36        89   85+ years
FALSE 37        98   85+ years
FALSE 38        67 65-69 years
FALSE 39        77 75-79 years
FALSE 40        67 65-69 years
FALSE 41        71 70-74 years
FALSE 42        25 25-29 years
FALSE 43       105   85+ years
FALSE 44        60 60-64 years
FALSE 45        86   85+ years
FALSE 46        52 50-54 years
FALSE 47       104   85+ years
FALSE 48         5   5-9 years
FALSE 49        46 45-49 years
FALSE 50        65 65-69 years
FALSE 51        12 10-14 years
FALSE 52        82 80-84 years
FALSE 53        91   85+ years
FALSE 54        39 35-39 years
FALSE 55        53 50-54 years
FALSE 56        30 30-34 years
FALSE 57        69 65-69 years
FALSE 58        16 15-19 years
FALSE 59        69 65-69 years
FALSE 60       102   85+ years
FALSE 61        85   85+ years
FALSE 62        14 10-14 years
FALSE 63        13 10-14 years
FALSE 64        81 80-84 years
FALSE 65        47 45-49 years
FALSE 66         3   0-4 years
FALSE 67        45 45-49 years
FALSE 68         5   5-9 years
FALSE 69        22 20-24 years
FALSE 70        60 60-64 years
FALSE 71        45 45-49 years
FALSE 72       100   85+ years
FALSE 73        70 70-74 years
FALSE 74        78 75-79 years
FALSE 75        19 15-19 years
FALSE 76        39 35-39 years
FALSE 77        74 70-74 years
FALSE 78         9   5-9 years
FALSE 79        63 60-64 years
FALSE 80        37 35-39 years
FALSE 81        60 60-64 years
FALSE 82        44 40-44 years
FALSE 83        59 55-59 years
FALSE 84        49 45-49 years
FALSE 85        40 40-44 years
FALSE 86        87   85+ years
FALSE 87        64 60-64 years
FALSE 88       106   85+ years
FALSE 89        28 25-29 years
FALSE 90        86   85+ years
FALSE 91        60 60-64 years
FALSE 92         7   5-9 years
FALSE 93        34 30-34 years
FALSE 94        38 35-39 years
FALSE 95       100   85+ years
FALSE 96        47 45-49 years
FALSE 97        26 25-29 years
FALSE 98        28 25-29 years
FALSE 99        77 75-79 years
FALSE 100       95   85+ years

Plot the data

library(ggplot2)
pl <- ggplot(data = Patients,aes(x = AgeGroup))
pl <- pl + geom_bar()
pl <- pl + theme_minimal()
pl <- pl  + theme(axis.text.x = element_text(angle = 90,hjust =0 ))
pl

Using fill to control the fill colours

library(ggplot2)
pl <- ggplot(data = Patients,aes(x = AgeGroup, fill =AgeGroup ))
pl <- pl + geom_bar()
pl <- pl + theme_minimal()
pl <- pl  + theme(axis.text.x = element_text(angle = 90,hjust =0 ))
pl

Create 10 year age groups

Using the cut statement we can create 10 years age groups. Display the AgeGroup field with the age.

# How to create agegroups for patients in R
# R code to categorize age into group or breaks


Patients$AgeGroup <- cut(Patients$AgeYears, 
                         breaks = c(-Inf,10 ,20,30,40,50,60 ,70,80,90, Inf) ,
                         
                         labels = c("0-9 years"
                                    ,"10-19 years","20-29 years","30-39 years"
                                    ,"40-49 years","50-59 years","60-69 years"
                                    ,"70-79 years","80-89 years","90+ years"),
                         right = FALSE)




Patients
FALSE     AgeYears    AgeGroup
FALSE 1         72 70-79 years
FALSE 2         48 40-49 years
FALSE 3        101   90+ years
FALSE 4         99   90+ years
FALSE 5         91   90+ years
FALSE 6         92   90+ years
FALSE 7         58 50-59 years
FALSE 8         54 50-59 years
FALSE 9         24 20-29 years
FALSE 10        19 10-19 years
FALSE 11        25 20-29 years
FALSE 12       109   90+ years
FALSE 13        75 70-79 years
FALSE 14        45 40-49 years
FALSE 15        60 60-69 years
FALSE 16        74 70-79 years
FALSE 17        56 50-59 years
FALSE 18        89 80-89 years
FALSE 19        36 30-39 years
FALSE 20        24 20-29 years
FALSE 21        54 50-59 years
FALSE 22        77 70-79 years
FALSE 23        74 70-79 years
FALSE 24        83 80-89 years
FALSE 25        69 60-69 years
FALSE 26        82 80-89 years
FALSE 27        64 60-69 years
FALSE 28       106   90+ years
FALSE 29         0   0-9 years
FALSE 30       105   90+ years
FALSE 31        83 80-89 years
FALSE 32        57 50-59 years
FALSE 33        11 10-19 years
FALSE 34       105   90+ years
FALSE 35        77 70-79 years
FALSE 36        89 80-89 years
FALSE 37        98   90+ years
FALSE 38        67 60-69 years
FALSE 39        77 70-79 years
FALSE 40        67 60-69 years
FALSE 41        71 70-79 years
FALSE 42        25 20-29 years
FALSE 43       105   90+ years
FALSE 44        60 60-69 years
FALSE 45        86 80-89 years
FALSE 46        52 50-59 years
FALSE 47       104   90+ years
FALSE 48         5   0-9 years
FALSE 49        46 40-49 years
FALSE 50        65 60-69 years
FALSE 51        12 10-19 years
FALSE 52        82 80-89 years
FALSE 53        91   90+ years
FALSE 54        39 30-39 years
FALSE 55        53 50-59 years
FALSE 56        30 30-39 years
FALSE 57        69 60-69 years
FALSE 58        16 10-19 years
FALSE 59        69 60-69 years
FALSE 60       102   90+ years
FALSE 61        85 80-89 years
FALSE 62        14 10-19 years
FALSE 63        13 10-19 years
FALSE 64        81 80-89 years
FALSE 65        47 40-49 years
FALSE 66         3   0-9 years
FALSE 67        45 40-49 years
FALSE 68         5   0-9 years
FALSE 69        22 20-29 years
FALSE 70        60 60-69 years
FALSE 71        45 40-49 years
FALSE 72       100   90+ years
FALSE 73        70 70-79 years
FALSE 74        78 70-79 years
FALSE 75        19 10-19 years
FALSE 76        39 30-39 years
FALSE 77        74 70-79 years
FALSE 78         9   0-9 years
FALSE 79        63 60-69 years
FALSE 80        37 30-39 years
FALSE 81        60 60-69 years
FALSE 82        44 40-49 years
FALSE 83        59 50-59 years
FALSE 84        49 40-49 years
FALSE 85        40 40-49 years
FALSE 86        87 80-89 years
FALSE 87        64 60-69 years
FALSE 88       106   90+ years
FALSE 89        28 20-29 years
FALSE 90        86 80-89 years
FALSE 91        60 60-69 years
FALSE 92         7   0-9 years
FALSE 93        34 30-39 years
FALSE 94        38 30-39 years
FALSE 95       100   90+ years
FALSE 96        47 40-49 years
FALSE 97        26 20-29 years
FALSE 98        28 20-29 years
FALSE 99        77 70-79 years
FALSE 100       95   90+ years

Summarise data using the groups


# Create data 
Patients2 <-   data.frame( AgeYears = floor(runif(1000, min = 0, max = 110))
                        , Freq =floor(runif(1000, min = 100, max = 1000)))

head(Patients2)
FALSE   AgeYears Freq
FALSE 1       26  974
FALSE 2       45  302
FALSE 3       14  215
FALSE 4       25  209
FALSE 5       75  130
FALSE 6       37  164

#Create 5 year age groups

#Using the cut statement we can create 5 years age groups. Display the AgeGroup field with the age.

# How to create agegroups for patients in R
# R code to categorize age into group or breaks


Patients2$AgeGroup <- cut(Patients2$AgeYears, 
                         breaks = c(-Inf
                                    ,5 ,10 ,15,20,25,30,35,40,45,50,55,60 ,65,70,75,80,85
                                    , Inf), 
                         
                         labels = c("0-4 years"
                                    ,"5-9 years","10-14 years","15-19 years","20-24 years"
                                    ,"25-29 years","30-34 years","35-39 years","40-44 years"
                                    ,"45-49 years","50-54 years","55-59 years","60-64 years"
                                    ,"65-69 years","70-74 years","75-79 years","80-84 years"
                                    ,"85+ years"),
                         right = FALSE)


library(dplyr)
PatientsGroups <- Patients2%>%
                  dplyr::group_by(AgeGroup)%>%
                 dplyr::summarise(Total = sum(Freq))


PatientsGroups
FALSE # A tibble: 18 x 2
FALSE    AgeGroup     Total
FALSE  * <fct>        <dbl>
FALSE  1 0-4 years    24042
FALSE  2 5-9 years    16209
FALSE  3 10-14 years  31044
FALSE  4 15-19 years  21828
FALSE  5 20-24 years  19566
FALSE  6 25-29 years  27683
FALSE  7 30-34 years  27601
FALSE  8 35-39 years  24354
FALSE  9 40-44 years  16266
FALSE 10 45-49 years  22739
FALSE 11 50-54 years  23738
FALSE 12 55-59 years  21052
FALSE 13 60-64 years  26158
FALSE 14 65-69 years  25163
FALSE 15 70-74 years  25034
FALSE 16 75-79 years  24118
FALSE 17 80-84 years  25220
FALSE 18 85+ years   130904


# reconfirm if the totals are same 
sum(Patients$Freq)
FALSE [1] 0
sum(PatientsGroups$Total)
FALSE [1] 532719

Plot the data

library(ggplot2)
pl <- ggplot(data = Patients,aes(x = AgeGroup))
pl <- pl + geom_bar()
pl <- pl + theme_minimal()
pl <- pl  + theme(axis.text.x = element_text(angle = 90,hjust =0 ))
pl

Using fill to control the fill colours

library(ggplot2)
pl <- ggplot(data = Patients,aes(x = AgeGroup, fill =AgeGroup ))
pl <- pl + geom_bar()
pl <- pl + theme_minimal()
pl <- pl  + theme(axis.text.x = element_text(angle = 90,hjust =0 ))
pl

You can use the cut statement to cut or bin any numeric data into any sized bins or groups.

youtube video link with explanations for these examples https://youtu.be/Vdu9KtyquCM