Please indicate
Perform an Exploratory Data Analysis (EDA) on the profiles data set, specifically on the relationship between gender and
incomejobpetowners all keeping in mind in HW-3, you will be fitting a logistic regression to predict a user’s gender based on these variables.profiles <- profiles %>%
mutate(is_female = ifelse(sex=="f", 1, 0))
gender <- profiles %>%
mutate(sex = fct_recode(sex, "Female" = "f", "Male" = "m")) %>%
group_by(sex) %>%
tally()%>%
rename(gender = n) %>%
mutate(prop = gender/sum(gender))
ggplot(gender, aes(x = sex, y = prop)) +
geom_bar(stat = "identity", position = "dodge", fill = "cadetblue2", color = "darkslateblue", width = .65) +
theme_minimal() +
labs(title = "Proportion by Sex on OkCupid", x = "Sex", y = "Proportion")
# cleaning variables
petowners <- profiles %>%
select(sex, income, job, pets, height, age, body_type, sign) %>%
mutate(petowners = fct_recode(pets,
# New name = old name
"dogs" = "has dogs",
"dogs" = "has dogs and dislikes cats",
"dogs" = "has dogs and likes cats",
"both" = "has dogs and has cats",
"cats" = "has cats",
"cats" = "dislikes dogs and has cats",
"cats" = "likes dogs and has cats",
"neither" = "dislikes cats",
"neither" = "dislikes dogs",
"neither" = "dislikes dogs and dislikes cats",
"neither" = "dislikes dogs and likes cats",
"neither" = "likes cats",
"neither" = "likes dogs",
"neither" = "likes dogs and dislikes cats",
"neither" = "likes dogs and likes cats"
))
people <- petowners %>%
mutate(sex = fct_recode(sex, "male" = "m", "female" = "f"))
astrology <- people %>%
mutate(job = fct_recode(job,
"other" = "military",
"other" = "unemployed",
"other" = "transportation",
"other" = "retired",
"other" = "rather not say",
"other" = "political / government",
"other" = "clerical / administrative",
"other" = "hospitality / travel",
"other" = "law / legal services",
"other" = "construction / craftsmanship"
))
#Jobs variable
filtered <- astrology %>%
select(sex, job, income, petowners, sign) %>%
group_by(job, sex)%>%
tally() %>%
rename(counted = n)
filtered$prob <- filtered$counted/sum(filtered$counted)
filtered <- filtered %>%
mutate(conditional = ifelse(sex == "female", prob/.4023121, prob/(1-.4023121) ))
ggplot(filtered, aes(x=fct_reorder(job, counted, na.rm=TRUE), y = counted, fill = sex)) +
geom_bar(stat = "identity", position = "dodge") +
coord_flip() +
theme_minimal() +
labs(title="Jobs by Sex", y='Number of Individuals', x='Job Description')
ggplot(filtered, aes(x=fct_reorder(job, conditional, median, na.rm=TRUE), y = conditional, fill = sex)) +
geom_bar(stat = "identity", position = "dodge") +
coord_flip() +
theme_minimal() +
labs(title="Conditional Probability of Being M/F Given Job", y='Conditional Probability', x='Job Description')
# Petowners variable
dog <- astrology %>%
select(sex, job, income, petowners, sign) %>%
group_by(petowners, sex) %>%
tally() %>%
rename(dogsandcats = n)
dog$prob <- dog$dogsandcats/sum(dog$dogsandcats)
dog <- dog %>%
mutate(conditional = ifelse(sex == "female", prob/.4023121, prob/(1-.4023121) )) %>%
mutate(prop = dogsandcats/sum(dogsandcats))
ggplot(dog, aes(x=fct_reorder(petowners, dogsandcats, median, na.rm=TRUE), y = dogsandcats, fill = sex)) +
geom_bar(stat = "identity", position = "fill", color = "violetred4", width = .5) +
coord_flip() +
theme_minimal() +
labs(title="Pet Ownership", y='Proportion Male and Female', x='Pet Owner Status')
ggplot(dog, aes(x=fct_reorder(petowners, conditional, median, na.rm=TRUE), y = conditional, fill = sex)) +
geom_bar(stat = "identity", position = "dodge", color = "violetred4", width = .5) +
coord_flip() +
labs(title="Conditional Probability of Pet Ownership", y='Conditional Probability', x='Pet Owner Status')
# income variable
income <- astrology %>%
select(sex, job, income, petowners, sign) %>%
group_by(income, sex) %>%
tally() %>%
rename(incomestats = n) %>%
mutate(sum = sum(incomestats))
income <- income %>%
mutate(names = paste(income, ": (n = ", sum, ")", sep=""))
ggplot(income, aes(x=fct_reorder(names, income), y = incomestats, fill = sex)) +
geom_bar(stat = "identity", position = "fill", width = .6, color = "violetred4") +
coord_flip() +
theme_minimal() +
labs(title="Income", y='Proportion Male and Female', x='Income in USD')
In the file HW-2_Shiny_App.Rmd, build the Shiny App discussed in Lec09 on Monday 10/3: Using the movies data set in the ggplot2movies data set, make a Shiny app that
Action, Animation, Comedy, etc), have a radio button that allows you to toggle between comedies and non-comedies. This app should be simpler.