library(ggplot2) library(dplyr)

head(data) strdata <- read.csv(“adult.csv”) (data) summary(data)

data[data == “?”] <- NA data <- na.omit(data)

data\(sex <- as.factor(data\)sex) data\(education <- as.factor(data\)education) data\(income <- as.factor(data\)income)

ggplot(data, aes(x = age)) + geom_histogram() ggplot(data, aes(x = sex,fill=sex)) + geom_bar()

ggplot(data, aes(x = education)) + geom_bar(fill = “orange”) + theme()

ggplot(data, aes(x = income, fill = income)) + geom_bar()

ggplot(data, aes(x = age, fill = income)) + geom_histogram(position=‘dodge’)

table(data\(sex, data\)income)

ggplot(data, aes(x = education, fill = income)) + geom_bar(position = “fill”)

ggplot(data, aes(x = hours.per.week)) + geom_histogram(fill = “green”)

ggplot(data, aes(x = occupation)) +

geom_bar() + theme(axis.text.x = element_text(angle = 90))

table(data$native.country)

cor(data$age, data$hours.per.week)

ggplot(data, aes(x = income, y = age)) + geom_boxplot()

ggplot(data, aes(x = hours.per.week, y = age, color = income)) +

geom_point()

ggplot(data, aes(x = education, y = hours.per.week)) + geom_boxplot()

high_income <- data %>% filter(income == “>50K”)

mean(high_income$age)

mean(low_income$age)

model <- lm(age ~ hours.per.week, data = data) summary(model)

model2 <- glm(income ~ age + education + hours.per.week, data = data, family = “binomial”) summary(model2)