library(ggplot2) library(dplyr)
head(data) strdata <- read.csv(“adult.csv”) (data) summary(data)
data[data == “?”] <- NA data <- na.omit(data)
data\(sex <- as.factor(data\)sex) data\(education <- as.factor(data\)education) data\(income <- as.factor(data\)income)
ggplot(data, aes(x = age)) + geom_histogram() ggplot(data, aes(x = sex,fill=sex)) + geom_bar()
ggplot(data, aes(x = education)) + geom_bar(fill = “orange”) + theme()
ggplot(data, aes(x = income, fill = income)) + geom_bar()
ggplot(data, aes(x = age, fill = income)) + geom_histogram(position=‘dodge’)
table(data\(sex, data\)income)
ggplot(data, aes(x = education, fill = income)) + geom_bar(position = “fill”)
ggplot(data, aes(x = hours.per.week)) + geom_histogram(fill = “green”)
ggplot(data, aes(x = occupation)) +
geom_bar() + theme(axis.text.x = element_text(angle = 90))
table(data$native.country)
cor(data$age, data$hours.per.week)
ggplot(data, aes(x = income, y = age)) + geom_boxplot()
ggplot(data, aes(x = hours.per.week, y = age, color = income)) +
geom_point()
ggplot(data, aes(x = education, y = hours.per.week)) + geom_boxplot()
high_income <- data %>% filter(income == “>50K”)
mean(high_income$age)
mean(low_income$age)
model <- lm(age ~ hours.per.week, data = data) summary(model)
model2 <- glm(income ~ age + education + hours.per.week, data = data, family = “binomial”) summary(model2)