Load Libraries
library(dplyr)
library(tidyr)
library(readr)
library(ggplot2)
Import data
df <- read_csv('adult.csv.zip', na = c("", "NA", "?"))
Convert categorical variables
df$age <- as.factor(df$age)
df$workclass <- as.factor(df$workclass)
df$education <- as.factor(df$education)
df$marital.status <- as.factor(df$marital.status)
df$occupation <- as.factor(df$occupation)
df$relationship <- as.factor(df$relationship)
df$race <- as.factor(df$race)
df$sex <- as.factor(df$sex)
df$native.country <- as.factor(df$native.country)
df$income <- as.factor(df$income)
Remove duplicate variables
df <- df %>%
select(-education.num)
VARIATION
Visualising the distribution of a categorical variable, use Bar-Chart
How you visualise the distribution of a variable will depend on whether the variable is categorical or continuous. To examine the distribution of a categorical variable, use a bar chart:


Visualising the distribution of a continuous variable, use a histogram:
ggplot(data = df) +
geom_histogram(mapping = aes(x = hours.per.week, fill=..count..), binwidth = 5, col="red") +
scale_fill_gradient("Count", low = "green", high = "red")

To make it easy to see the unusual values, we need to zoom into to small values of the y-axis with coord_cartesian():
ggplot(data = df) +
geom_histogram(mapping = aes(x = hours.per.week)) +
coord_cartesian(ylim = c(0, 100), xlim = c(90, 100))

COVARIATION
Variation describes the behavior within a variable, covariation describes the behavior between variables.
A categorical and continuous variable
to display the distribution of a continuous variable broken down by a categorical variable is the boxplot.

Two categorical variables
To visualise the covariation between categorical variables, you’ll need to count the number of observations for each combination. One way to do that is to rely on the built-in geom_count():
ggplot(data = df) +
geom_count(mapping = aes(x = education, y = race)) +
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) +
coord_flip()

Then visualise with geom_tile() and the fill aesthetic:
df %>%
count(education, race) %>%
ggplot(mapping = aes(x = education, y = race)) +
geom_tile(mapping = aes(fill = n)) +
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) +
coord_flip()

LS0tDQp0aXRsZTogIkV4cGxvcmF0b3J5IERhdGEgQW5hbHlzaXMiDQpvdXRwdXQ6DQogIGh0bWxfbm90ZWJvb2s6IGRlZmF1bHQNCiAgcGRmX2RvY3VtZW50OiBkZWZhdWx0DQotLS0NCiMjI0xvYWQgTGlicmFyaWVzDQpgYGB7ciwgZWNobz1UUlVFLCBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFfQ0KbGlicmFyeShkcGx5cikNCmxpYnJhcnkodGlkeXIpDQpsaWJyYXJ5KHJlYWRyKQ0KbGlicmFyeShnZ3Bsb3QyKQ0KYGBgDQojIyNJbXBvcnQgZGF0YQ0KYGBge3IsIGVjaG89VFJVRSwgbWVzc2FnZT1UUlVFLCB3YXJuaW5nPVRSVUV9DQpkZiA8LSByZWFkX2NzdignYWR1bHQuY3N2LnppcCcsIG5hID0gYygiIiwgIk5BIiwgIj8iKSkNCmBgYA0KIyMjRGVzY3JpYmUgZmlsZSBmb3JtYXQNCmBgYHtyLCBlY2hvPVRSVUV9DQpzdHIoZGYsIG1heC5sZXZlbCA9IDEpDQpgYGANCiMjI0NvbnZlcnQgY2F0ZWdvcmljYWwgdmFyaWFibGVzDQpgYGB7ciwgZWNobz1UUlVFfQ0KZGYkYWdlIDwtIGFzLmZhY3RvcihkZiRhZ2UpDQpkZiR3b3JrY2xhc3MgPC0gYXMuZmFjdG9yKGRmJHdvcmtjbGFzcykNCmRmJGVkdWNhdGlvbiA8LSBhcy5mYWN0b3IoZGYkZWR1Y2F0aW9uKQ0KZGYkbWFyaXRhbC5zdGF0dXMgPC0gYXMuZmFjdG9yKGRmJG1hcml0YWwuc3RhdHVzKQ0KZGYkb2NjdXBhdGlvbiA8LSBhcy5mYWN0b3IoZGYkb2NjdXBhdGlvbikNCmRmJHJlbGF0aW9uc2hpcCA8LSBhcy5mYWN0b3IoZGYkcmVsYXRpb25zaGlwKQ0KZGYkcmFjZSA8LSBhcy5mYWN0b3IoZGYkcmFjZSkNCmRmJHNleCA8LSBhcy5mYWN0b3IoZGYkc2V4KQ0KZGYkbmF0aXZlLmNvdW50cnkgPC0gYXMuZmFjdG9yKGRmJG5hdGl2ZS5jb3VudHJ5KQ0KZGYkaW5jb21lIDwtIGFzLmZhY3RvcihkZiRpbmNvbWUpDQpgYGANCiMjI1JlbW92ZSBkdXBsaWNhdGUgdmFyaWFibGVzDQpgYGB7ciwgZWNobz1UUlVFfQ0KZGYgPC0gZGYgJT4lDQogIHNlbGVjdCgtZWR1Y2F0aW9uLm51bSkNCmBgYA0KIyNWQVJJQVRJT04gDQojIyNWaXN1YWxpc2luZyB0aGUgZGlzdHJpYnV0aW9uIG9mIGEgY2F0ZWdvcmljYWwgdmFyaWFibGUsIHVzZSBCYXItQ2hhcnQNCkhvdyB5b3UgdmlzdWFsaXNlIHRoZSBkaXN0cmlidXRpb24gb2YgYSB2YXJpYWJsZSB3aWxsIGRlcGVuZCBvbiB3aGV0aGVyIHRoZSB2YXJpYWJsZSBpcyBjYXRlZ29yaWNhbCBvciBjb250aW51b3VzLiBUbyBleGFtaW5lIHRoZSBkaXN0cmlidXRpb24gb2YgYSBjYXRlZ29yaWNhbCB2YXJpYWJsZSwgdXNlIGEgYmFyIGNoYXJ0Og0KYGBge3IsIGVjaG89RkFMU0V9DQpnZ3Bsb3QoZGYsIGFlcyh4ID1pbmNvbWUpKSArDQogIGdlb21fYmFyKGFlcyh5ID0gKC4uY291bnQuLikvc3VtKC4uY291bnQuLikpKSArDQogIGdlb21fdGV4dChhZXMoeSA9ICgoLi5jb3VudC4uKS9zdW0oLi5jb3VudC4uKSksIGxhYmVsID0gc2NhbGVzOjpwZXJjZW50KCguLmNvdW50Li4pL3N1bSguLmNvdW50Li4pKSksIHN0YXQgPSAiY291bnQiLCB2anVzdCA9IC0wLjI1KSArDQogIGxhYnModGl0bGUgPSAiSW5jb21lIERpc3RyaWJ1dGlvbiIsIHkgPSAiUGVyY2VudCIsIHggPSAiSW5jb21lIikNCmBgYA0KYGBge3IsIGVjaG89RkFMU0V9DQogIGdncGxvdChkZiwgYWVzKHg9cmVvcmRlcihlZHVjYXRpb24sIC10YWJsZShlZHVjYXRpb24pW2VkdWNhdGlvbl0pKSkrDQogIGdlb21fYmFyKGFlcyh5ID0gKC4uY291bnQuLikvc3VtKC4uY291bnQuLikpKSArDQogIGdlb21fdGV4dChhZXMoeSA9ICgoLi5jb3VudC4uKS9zdW0oLi5jb3VudC4uKSksIGxhYmVsID0gc2NhbGVzOjpwZXJjZW50KCguLmNvdW50Li4pL3N1bSguLmNvdW50Li4pKSksIHN0YXQgPSAiY291bnQiLCB2anVzdCA9IC0wLjI1KSArDQogIGxhYnModGl0bGUgPSAiRWR1Y2F0aW9uIERpc3RyaWJ1dGlvbiIsIHkgPSAiUGVyY2VudCIsIHggPSAiRWR1Y2F0aW9uIikgKw0KICB0aGVtZShheGlzLnRleHQueD1lbGVtZW50X3RleHQoYW5nbGU9OTAsaGp1c3Q9MSx2anVzdD0wLjUpKQ0KYGBgDQojIyNWaXN1YWxpc2luZyB0aGUgZGlzdHJpYnV0aW9uIG9mIGEgY29udGludW91cyB2YXJpYWJsZSwgdXNlIGEgaGlzdG9ncmFtOg0KYGBge3J9DQpnZ3Bsb3QoZGF0YSA9IGRmKSArDQogIGdlb21faGlzdG9ncmFtKG1hcHBpbmcgPSBhZXMoeCA9IGhvdXJzLnBlci53ZWVrLCBmaWxsPS4uY291bnQuLiksIGJpbndpZHRoID0gNSwgY29sPSJyZWQiKSArDQogIHNjYWxlX2ZpbGxfZ3JhZGllbnQoIkNvdW50IiwgbG93ID0gImdyZWVuIiwgaGlnaCA9ICJyZWQiKQ0KYGBgDQpUbyBtYWtlIGl0IGVhc3kgdG8gc2VlIHRoZSB1bnVzdWFsIHZhbHVlcywgd2UgbmVlZCB0byB6b29tIGludG8gdG8gc21hbGwgdmFsdWVzIG9mIHRoZSB5LWF4aXMgd2l0aCBjb29yZF9jYXJ0ZXNpYW4oKToNCmBgYHtyfQ0KZ2dwbG90KGRhdGEgPSBkZikgKw0KICBnZW9tX2hpc3RvZ3JhbShtYXBwaW5nID0gYWVzKHggPSBob3Vycy5wZXIud2VlaykpICsNCiAgY29vcmRfY2FydGVzaWFuKHlsaW0gPSBjKDAsIDEwMCksIHhsaW0gPSBjKDkwLCAxMDApKQ0KYGBgDQojI0NPVkFSSUFUSU9ODQpWYXJpYXRpb24gZGVzY3JpYmVzIHRoZSBiZWhhdmlvciB3aXRoaW4gYSB2YXJpYWJsZSwgY292YXJpYXRpb24gZGVzY3JpYmVzIHRoZSBiZWhhdmlvciBiZXR3ZWVuIHZhcmlhYmxlcy4gIA0KDQojIyNBIGNhdGVnb3JpY2FsIGFuZCBjb250aW51b3VzIHZhcmlhYmxlDQp0byBkaXNwbGF5IHRoZSBkaXN0cmlidXRpb24gb2YgYSBjb250aW51b3VzIHZhcmlhYmxlIGJyb2tlbiBkb3duIGJ5IGEgY2F0ZWdvcmljYWwgdmFyaWFibGUgaXMgdGhlIGJveHBsb3QuDQpgYGB7ciwgZWNobz1GQUxTRX0NCmdncGxvdChkYXRhID0gZGYpICsNCiAgZ2VvbV9ib3hwbG90KG1hcHBpbmcgPSBhZXMoeCA9IHJlb3JkZXIoZWR1Y2F0aW9uLGhvdXJzLnBlci53ZWVrLEZVTiA9IG1lZGlhbikgLCB5ID0gaG91cnMucGVyLndlZWspKSArDQogIGxhYnMoeD0nRWR1Y2F0aW9uJywgeT0nSG91cnMgcGVyIHdlZWsnLCB0aXRsZT0nQ292YXJpYXRpb24gYmV0d2VlbiBob3VycyBhbmQgZWR1Y2F0aW9uJykgKw0KICBjb29yZF9mbGlwKCkNCmBgYA0KIyMjVHdvIGNhdGVnb3JpY2FsIHZhcmlhYmxlcw0KVG8gdmlzdWFsaXNlIHRoZSBjb3ZhcmlhdGlvbiBiZXR3ZWVuIGNhdGVnb3JpY2FsIHZhcmlhYmxlcywgeW914oCZbGwgbmVlZCB0byBjb3VudCB0aGUgbnVtYmVyIG9mIG9ic2VydmF0aW9ucyBmb3IgZWFjaCBjb21iaW5hdGlvbi4gT25lIHdheSB0byBkbyB0aGF0IGlzIHRvIHJlbHkgb24gdGhlIGJ1aWx0LWluIGdlb21fY291bnQoKToNCmBgYHtyfQ0KZ2dwbG90KGRhdGEgPSBkZikgKw0KICBnZW9tX2NvdW50KG1hcHBpbmcgPSBhZXMoeCA9IGVkdWNhdGlvbiwgeSA9IHJhY2UpKSArDQogIHRoZW1lKGF4aXMudGV4dC54PWVsZW1lbnRfdGV4dChhbmdsZT05MCxoanVzdD0xLHZqdXN0PTAuNSkpICsNCiAgY29vcmRfZmxpcCgpDQpgYGANClRoZW4gdmlzdWFsaXNlIHdpdGggZ2VvbV90aWxlKCkgYW5kIHRoZSBmaWxsIGFlc3RoZXRpYzoNCmBgYHtyfQ0KZGYgJT4lIA0KICBjb3VudChlZHVjYXRpb24sIHJhY2UpICU+JSAgDQogIGdncGxvdChtYXBwaW5nID0gYWVzKHggPSBlZHVjYXRpb24sIHkgPSByYWNlKSkgKw0KICAgIGdlb21fdGlsZShtYXBwaW5nID0gYWVzKGZpbGwgPSBuKSkgKw0KICB0aGVtZShheGlzLnRleHQueD1lbGVtZW50X3RleHQoYW5nbGU9OTAsaGp1c3Q9MSx2anVzdD0wLjUpKSArDQogIGNvb3JkX2ZsaXAoKQ0KYGBgDQoNCg==