Introduction


Content here …

Required package


library(foreign) # for importing the Stata v12 dataset
library(dplyr)
library(tidyverse) # has drop_na() function
library(ggplot2)
library(scales) # percent function
library(kableExtra) # display table formatting

Import data set to use


chs = read.dta("~/chs12.dta") # note that chs12.dta is saved in Stata version 12
# get size of dataframe i.e. number of rows and columns
(dims = dim(chs))
## [1] 10337    35
# preview first 6 observations and first 10 columns
kbl(head(chs)[, 1:10], 
    caption = "Table 1: Previewing first 6 observations and first 10 observations.") %>%
    kable_styling(bootstrap_options = "striped", full_width = FALSE, position = "left")
Table 1: Previewing first 6 observations and first 10 observations.
id region smsa hhsize highbp sex race age agegroup height
1400 South SMSA Non-City 4 No Male White 54 50-59 174.6
1401 South SMSA Non-City 6 No Female White 41 40-49 152.3
1402 South SMSA City 6 No Female Other 21 20-29 164.1
1404 South SMSA Non-City 9 Yes Female White 63 60-69 162.6
1405 South SMSA City 3 No Female White 64 60-69 163.1
1406 South SMSA City 1 Yes Female White 63 60-69 147.1

Create a table of frequencies for the variable to be plotted.

results = chs %>% drop_na(health) %>% # drop missing values by health variable
    group_by(health) %>%  # specify categorical variable
    summarize(Frequency = n()) %>% # return counts / frequencies
    mutate(Percent = paste0(round(Frequency / dims[1] * 100, 2), "%")) # percentages
# display
kbl(results, 
    caption = "Table 2: Frequency table for health status.") %>%
    kable_styling(bootstrap_options = "striped", full_width = FALSE, position = "left")
Table 2: Frequency table for health status.
health Frequency Percent
Poor 729 7.05%
Fair 1670 16.16%
Average 2938 28.42%
Good 2591 25.07%
Excellent 2407 23.29%

Now plot a simple pie-chart.

pie = ggplot(results, aes(x = "", y = Frequency, fill = health)) +
    geom_bar(width = 1, stat = "identity") + # this plots a stacked bar chart
    coord_polar(theta = "y", start = 0) # convert the above bar chart to a pie chart
pie

Change theme and add the labels (as percentages).

pie = pie + theme_void() + # these theme removes the lines around the chart and the grey background
    geom_text(aes(y = Frequency/3 + c(0, cumsum(Frequency)[-length(Frequency)]),
                  label = percent(Frequency/dims[1])), # frequencies (size labels)
              color = "black", size = 4, hjust = 1.0)
pie

Assigning fill scale

pie + scale_fill_brewer()

Assigning colours manually.

slice_colors = c("magenta", "pink", "grey", "skyblue", "orange")
pie + scale_fill_manual(values = slice_colors)

Changing the position of the legend

pie + labs(fill = "Health status")

Changing the position of the legend

pie + labs(fill = "") +
    theme(legend.position = "bottom")

Change font-size of legend title and text.

pie + labs(fill = "Health status") +
    theme(legend.title = element_text(size = 12),
          legend.text = element_text(size = 12))

Changing the pie-chart to a doughnut.

Doughnut chart is just a simple pie chart with a hole inside. In this case, we set the value of x to 2 i.e. x = 2 and define the x limits [xlim = c(0.5, 2.5)] to create the hole inside the pie chart. Additionally, we remove the argument width in the function geom_bar(). Below is the complete syntax.

pie = ggplot(results, aes(x = 2, y = Frequency, fill = health)) +
    geom_bar(stat = "identity") +
    coord_polar(theta = "y", start = 0) + 
    theme_void() + # these theme removes the lines around chart and grey background
    geom_text(aes(y = Frequency/3 + c(0, cumsum(Frequency)[-length(Frequency)]),
                  label = percent(Frequency/dims[1])), # frequencies (size labels)
              color = "black", size = 4, hjust = 1.0) +
    theme(legend.title = element_text(size = 12),
          legend.text = element_text(size = 12)) +
    labs(fill = "Health status") +
    xlim(0.5, 2.5)
pie


STEM Research
https://stemresearchs.com