Q10a - average weekly incomes
#### Step 1: remove string, text
df <- haven::as_factor(df)
df <- df %>%
separate(Q10a, into = c("w_from", "w_to"), sep = "-", extra = "merge")
df$w_from <- gsub('and more', '', as.character(df$w_from)) # df$w_from <- str_replace(string = df$w_from, pattern = 'and more', replacement = '')
df$w_from <- as.numeric(gsub(",","",df$w_from)) # remove comma (,) and turn to numeric
df$w_to <- as.numeric(gsub(",","",df$w_to)) # remove comma (,) and turn to numeric
#### Step 2: Calculate annual income
df <- df %>%
mutate(w_from_annual = (df$w_from * 52), .after = w_to) %>% # 52 week per year
mutate(w_to_annual = (df$w_to * 52), .after = w_from_annual) # 52 week per year
# # Separate thousand - hundreds,...
# # Adding "," and numeric is converted back to character
# df$w_from_annual <- format(as.numeric(df$w_from_annual),big.mark=",")
# df$w_to_annual <- format(as.numeric(df$w_to_annual),big.mark=",")
#### Step 3: Unite columns
df$w_from_annual <- as.character(df$w_from_annual)
df$w_to_annual <- as.character(df$w_to_annual)
df <- df %>%
unite(
col = 'annual_income (w)',
c('w_from_annual', 'w_to_annual'),
sep = '-',
remove = TRUE,
na.rm = FALSE
)
#### Step 4
df$w_from <- NULL
df$w_to <- NULL
df$`annual_income (w)` <- gsub('NA-NA', '', as.character(df$`annual_income (w)`))
# df$`avg annual earnings` <- str_replace(string = df$`avg annual earnings`, pattern = 'NA-NA', replacement = '')
df$`annual_income (w)` <- str_replace(df$`annual_income (w)`, '54236-NA', '54236 and more')
# df$`avg annual earnings`[df$`avg annual earnings` == '54236-NA'] <- '54236 and more' <-- using R base
Q10b - average monthly income
#### Step 1: remove string, text
df <- haven::as_factor(df)
df <- df %>%
separate(Q10b, into = c("m_from", "m_to"), sep = "-", extra = "merge")
df$m_from <- gsub('and more', '', as.character(df$m_from))
df$m_from <- as.numeric(gsub(",","",df$m_from))
df$m_to <- as.numeric(gsub(",","",df$m_to))
#### Step 2: Calculate annual income
df <- df %>%
mutate(m_from_annual = (df$m_from * 12), .after = m_to) %>% # 12 month per year
mutate(m_to_annual = (df$m_to * 12), .after = m_from_annual) # 12 month per year
# # Separate thousand - hundreds,...
# df$m_from_annual <- format(as.numeric(df$m_from_annual),big.mark=",")
# df$m_to_annual <- format(as.numeric(df$m_to_annual),big.mark=",")
#### Step 3: Unite columns
df$m_from_annual <- as.character(df$m_from_annual)
df$m_to_annual <- as.character(df$m_to_annual)
df <- df %>%
unite(
col = 'annual_income (m)',
c('m_from_annual', 'm_to_annual'),
sep = '-',
remove = TRUE,
na.rm = FALSE
)
df$m_from <- NULL
df$m_to <- NULL
df$`annual_income (m)` <- gsub('NA-NA', '', as.character(df$`annual_income (m)`))
df$`annual_income (m)` <- str_replace(df$`annual_income (m)`, '55596-NA', '55596 and more')
str(df$Q10c)
df$Q10c <- as.character(df$Q10c)
str(df$Q10c)
df <- df %>%
unite(
col = 'annual income (£)',
c('annual_income (w)', 'annual_income (m)', 'Q10c'),
sep = ' ',
remove = TRUE,
na.rm = FALSE
)
df$`annual income (£)` <- gsub('NA','', df$`annual income (£)`)
df$`annual income (£)` <- gsub(',','', df$`annual income (£)`)
df$`annual income (£)` <- str_trim(df$`annual income (£)`, side = c("both"))
df %>%
mutate(earnings = (fct_relevel(`annual income (£)`)), .after = `annual income (£)`) %>%
tabyl(earnings)
df2 <- df %>% select(c(Q10, `annual income (£)`))
df2 %>% group_by(Q10, `annual income (£)`) %>% summarise(count = n())
## `summarise()` has grouped output by 'Q10'. You can override using the `.groups`
## argument.