Counts for different combinations of categorical variables
## Marital Status
## Gender M S
## F 3602 3568
## M 3264 3625
## Multidimensional tables based on three or more categorical variables
table1 <- table(Gender, `Marital Status`,`State or Province`)
ftable(table1)
## State or Province BC CA DF Guerrero Jalisco OR Veracruz WA Yucatan Zacatecas
## Gender Marital Status
## F M 190 638 188 77 15 510 142 1166 200 476
## S 183 686 175 107 30 607 125 1134 164 357
## M M 197 692 210 94 5 514 108 1160 129 155
## S 239 717 242 105 25 631 89 1107 161 309
Distribution of the categories in a variable.
table2 <- table(Gender)
datatable(data = as_tibble(prop.table(table2)) %>%
rename(Proportion = n) %>%
mutate(Proportion = round(Proportion*100,3)),
style = "Bootstrap")
# percentages for gender by marital status
table3 <- table(`Marital Status`, Gender)
datatable(data = as_tibble(prop.table(table3)) %>%
rename(Proportion = n) %>%
mutate(Proportion = round(Proportion*100,3)),
style = "Bootstrap")
# customer percentages across location by gender and marital status
# using table1 from previous code chunk
ftable(round(prop.table(table1), 3))
## State or Province BC CA DF Guerrero Jalisco OR Veracruz WA Yucatan Zacatecas
## Gender Marital Status
## F M 0.014 0.045 0.013 0.005 0.001 0.036 0.010 0.083 0.014 0.034
## S 0.013 0.049 0.012 0.008 0.002 0.043 0.009 0.081 0.012 0.025
## M M 0.014 0.049 0.015 0.007 0.000 0.037 0.008 0.083 0.009 0.011
## S 0.017 0.051 0.017 0.007 0.002 0.045 0.006 0.079 0.011 0.022
Marginals show the total counts or percentages across columns or rows in a contingency table:
## Marital Status
## M S
## 6866 7193
## Gender
## F M
## 7170 6889
## Gender
## Marital Status F M
## M 0.5246140 0.4753860
## S 0.4960378 0.5039622
## Gender
## Marital Status F M
## M 0.5023710 0.4737988
## S 0.4976290 0.5262012
reorder_size <- function(x) {
factor(x, levels = names(sort(table(x), decreasing = TRUE)))
}
ggplot(transac_data, aes(x = reorder_size(`State or Province`))) +
geom_bar(fill = "#ed5107") +
theme_classic() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust = 0.5)) +
ggtitle("Bar Charts")
## Or for proportions
ggplot(transac_data, aes(x = reorder_size(`State or Province`))) +
geom_bar(fill = "#02383c", aes( y = (..count..)/sum(..count..))) +
theme_classic() +
scale_y_continuous(labels = scales::percent, name = "Proportion") +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust = 0.5)) +
ggtitle("Bar Charts")
### Multiple Graph splitted by category
ggplot(transac_data, aes(x = reorder_size(`State or Province`))) +
geom_bar(aes(y = (..count..)/sum(..count..)), fill = "#ed0cef") +
xlab("State or Province") +
scale_y_continuous(labels = scales::percent, name = "Proportion") +
facet_grid(`Marital Status`~ Gender) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme_classic()