Objective

  1. Transform data from long-wide to create summary table for likert data visualisation;
  2. HH Package, likert, plot_likert;

Library

pacman::p_load(
  here,       # relative file pathways  
  haven,      # reading sav file
  sjlabelled,
  labelled,
  dplyr,      # data cleaning 
  rio,        # importing data  
  janitor,    # data cleaning and tables
  tidyverse,  # data management and visualization
  sjPlot,
  sjmisc,
  ggrepel,
  HH
)

Data import

Replace NA value for factor class

library(forcats)

# Add 'Not applicable' as factor level
plot_dt <- plot_dt %>% 
  mutate(across(starts_with("MM"), as_factor))

plot_dt <- plot_dt %>% 
  mutate(across(starts_with("AA"), as_factor))
         
plot_dt <- plot_dt %>%
  mutate(across(starts_with("MM"), \(x) fct_na_value_to_level(x, "Not applicable"))) %>% 
  mutate(across(starts_with("AA"), \(x) fct_na_value_to_level(x, "Not applicable")))

Plot with original likert data

# find all variables from COPE-Index, which all have a "AA" in their
# variable name, and then plot that subset as likert-plot
mydt <- find_var(plot_dt, pattern = "AA", out = "df")
plot_likert(mydt)

mydf <- find_var(plot_dt, pattern = "MM", out = "df")
plot_likert(mydf)

plot_likert(
  mydf, 
  grid.range = c(1.2, 1.2),  
  expand.grid = FALSE,
  values = "sum.outside",
  show.prc.sign = TRUE
)

plot_likert(
  mydf, 
grid.range = c(1.0, 1.0),  
  expand.grid = FALSE,
  values = "sum.outside",
  show.prc.sign = TRUE
)

plot_likert(
  mydf, 
grid.range = c(1.0, 1.0),  
  expand.grid = FALSE,
  values = "sum.outside",
  show.prc.sign = TRUE
)

Plot with summary table - Using HH package

Creating Summary percentage table

# Creating data summary table for MM_ questions 
mydf_1 <- mydf %>% 
  pivot_longer(
    cols = starts_with("MM"),
    values_drop_na = TRUE,      # drop NA
    values_to = "value"         # name of new colum
  ) %>% 
  group_by(name) %>% 
  summarise('Strongly agree' = sum(value == "Strongly agree")/n(),
            'Somewhat disagree' = sum(value == "Somewhat disagree")/n(),
            'Neither agree nor disagree' = sum(value == "Neither agree nor disagree")/n(),
            'Somewhat agree' = sum(value == "Somewhat agree")/n(),
            'Strongly agree' = sum(value == "Strongly agree")/n(),
            'Not applicable' = sum(value == "Not applicable")/n()
  )


# Creating data summary table for AA_ questions 
mydt_1 <- mydt %>% 
  pivot_longer(
    cols = starts_with("AA"),
    values_drop_na = TRUE,      # drop NA
    values_to = "value"         # name of new colum
  ) %>% 
  group_by(name) %>% 
  summarise('All of the time'  =  sum(value == "All of the time")/n(),
            'Most of the time' = sum(value == "Most of the time")/n(),
            'Some of the time' = sum(value == "Some of the time")/n(),
            'Rarely' = sum(value == "Rarely")/n(),
            'Not at all' = sum(value == "Not at all")/n(),
            'Not applicable' = sum(value == "Not applicable")/n()
  )

# round value
mydf_1 <- mydf_1 %>% mutate(across(where(is.numeric), \(x) round(x, digits = 2)))
mydt_1 <- mydt_1 %>% mutate(across(where(is.numeric), \(x) round(x, digits = 2)))

Plot the data percentage

likert(name~., 
       mydf_1,
       # ReferenceZero=3, only add it if want to remove the 3rd factor level, etc. Neutral 
       ylab = "Question",
       main = list("shopping data", x = unit(.62, "npc")),
       auto.key = list(columns = 2),
       reverse.rows = T)

likert(name~., 
       mydf_1,
       # ReferenceZero=3, only add it if want to remove the 3rd factor level, etc. Neutral 
       ylab = "Question",
       main = list("shopping data", x = unit(.62, "npc")),
       auto.key = list(columns = 2),
       reverse.rows = T,
       positive.order = T)

Creating Summary count table

mydf_2 <- mydf %>% 
  pivot_longer(
    cols = starts_with("MM"),
    values_drop_na = TRUE,      # drop NA
    values_to = "value"         # name of new colum
  ) %>% 
  group_by(name, value) %>%     # count 
  summarise(count = n()
            ) 
## `summarise()` has grouped output by 'name'. You can override using the
## `.groups` argument.
mydf_2_wide <-                  # convert long to wide dataframe 
  mydf_2 %>% 
  pivot_wider(
    id_cols = name,
    names_from = value,
    values_from = count
  )

mydf_2_wide
## # A tibble: 7 × 7
## # Groups:   name [7]
##   name  `Not applicable` `Strongly disagree` `Somewhat disagree`
##   <chr>            <int>               <int>               <int>
## 1 MM10               147                 102                 122
## 2 MM4                 49                 196                 252
## 3 MM5                 46                 108                 188
## 4 MM6                117                  77                  24
## 5 MM7                172                 145                 113
## 6 MM8                 82                 544                 129
## 7 MM9                134                  80                 113
## # ℹ 3 more variables: `Neither agree nor disagree` <int>,
## #   `Somewhat agree` <int>, `Strongly agree` <int>
# Creating data summary table for AA_ questions 
mydt_2 <- mydt %>% 
  pivot_longer(
    cols = starts_with("AA"),
    values_drop_na = TRUE,      # drop NA
    values_to = "value"         # name of new colum
  ) %>% 
  group_by(name, value) %>% 
  summarise(count = n())
## `summarise()` has grouped output by 'name'. You can override using the
## `.groups` argument.
mydt_2_wide <-                  # convert long to wide dataframe 
  mydt_2 %>% 
  pivot_wider(
    id_cols = name,
    names_from = value,
    values_from = count
  )

mydt_2_wide
## # A tibble: 3 × 7
## # Groups:   name [3]
##   name  `Not applicable` `All of the time` `Most of the time` `Some of the time`
##   <chr>            <int>             <int>              <int>              <int>
## 1 AA1                 76               411                245                180
## 2 AA2                 97               630                230                 77
## 3 AA3                125               135                 72                153
## # ℹ 2 more variables: Rarely <int>, `Not at all` <int>

Plot the data count

likert(name ~ ., data=mydf_2_wide, ylab=NULL,
       as.percent=TRUE,
       positive.order=TRUE, 
       main = list("Shopping_MM",x=unit(.55, "npc")), 
       sub= list("Rating",x=unit(.57, "npc")), 
       xlim=c(-100,-20,0,20,40,60,80,100),
       strip=FALSE, 
       par.strip.text=list(cex=.7))

likert(name ~ ., data=mydf_2_wide, ylab=NULL,
       as.percent=TRUE,
       positive.order=TRUE, 
       main = list("Shopping_MM",x=unit(.55, "npc")), 
       sub= list("Rating",x=unit(.57, "npc")), 
       xlim=c(-100,-20,0,20,40,60,80,100),
       strip=FALSE, 
       par.strip.text=list(cex=.7),
       auto.key = list(columns = 2))  # organise the text legend into 2 columns

likert(name ~ ., data=mydt_2_wide, ylab=NULL,
       as.percent=TRUE,
       positive.order=TRUE, 
       main = list("Shopping_AA",x=unit(.55, "npc")), 
       sub= list("Rating",x=unit(.57, "npc")), 
       xlim=c(-100,-20,0,20,40,60,80,100),
       strip=FALSE, 
       par.strip.text=list(cex=.7))

type <- c("Superdrug", "Superdrug", "Boots", "Boots", "Superdrug", "Superdrug", "Boots" )
new_dt <- cbind(mydf_2_wide, type)
## New names:
## • `` -> `...8`
colnames(new_dt)[8] <- "type"

likert(name~. | type, new_dt, 
       main = list("Shopping_MM", x=unit(.6, "npc")),
       sub= list("Satisfaction Rating",x=unit(.57, "npc")),
       layout=c(1,2), 
       auto.key = list(columns = 2, reverse.rows = T),
       scales=list(y=list(relation="free")), 
       between=list(y=1), 
       strip.left=TRUE, 
       strip = FALSE,
       par.strip.text=list(cex=1.1, lines=2), 
       ylab="Ranking",
       cex=1.2,
       as.percent=FALSE,         # plot count data, not percentage
       positive.order=TRUE,
       xlim=c(-900,-40,-20,0,20,40,60,80,1000),resize.height.tuning=1) # max count value in the data is 829, so lim must be higher than 800

References

  1. Creating Frequency Table from Likert Scale data in R;
  2. mutate() & replace_na() for numeric class;
  3. mutate() & replace_na() for factor levels/ factor class;
  4. On Likert Scales In R
  5. Likert scale