Admistrative:

Please indicate

  • Who you collaborated with: Trisha, Bianca
  • Roughly how much time you spent on this HW so far:7 hours
  • The URL of the RPubs published URL here.
  • What gave you the most trouble:Conditional Probabilities
  • Any comments you have: I could not figure out how to add a median line or how to change the color of the graphs that are have fill=sex.

Question 1:

Perform an Exploratory Data Analysis (EDA) on the profiles data set, specifically on the relationship between gender and

  • income
  • job
  • petowners all keeping in mind in HW-3, you will be fitting a logistic regression to predict a user’s gender based on these variables.
profiles <- profiles %>%
  mutate(is_female = ifelse(sex=="f", 1, 0))


gender <- profiles %>%
  mutate(sex = fct_recode(sex, "Female" = "f", "Male" = "m")) %>%
  group_by(sex) %>%
  tally()%>%
  rename(gender = n) %>%
  mutate(prop = gender/sum(gender))

ggplot(gender, aes(x = sex, y = prop)) +
  geom_bar(stat = "identity", position = "dodge", fill = "cadetblue2", color = "darkslateblue", width = .65) +
  theme_minimal() +
  labs(title = "Proportion by Sex on OkCupid", x = "Sex", y = "Proportion") 

# cleaning variables

petowners <- profiles %>%
  select(sex, income, job, pets, height, age, body_type, sign) %>%
  mutate(petowners = fct_recode(pets,
                            # New name = old name
                           "dogs" = "has dogs",
                           "dogs" = "has dogs and dislikes cats",
                           "dogs" = "has dogs and likes cats",
                           "both" = "has dogs and has cats",
                           "cats" = "has cats",
                           "cats" = "dislikes dogs and has cats",
                           "cats" = "likes dogs and has cats",
                           "neither" = "dislikes cats",
                           "neither" = "dislikes dogs",
                           "neither" = "dislikes dogs and dislikes cats",
                           "neither" = "dislikes dogs and likes cats",
                           "neither" = "likes cats",
                           "neither" = "likes dogs",
                           "neither" = "likes dogs and dislikes cats",
                           "neither" = "likes dogs and likes cats"
  )) 

people <- petowners %>%
  mutate(sex = fct_recode(sex, "male" = "m", "female" = "f"))
  

astrology <- people %>%
  mutate(job = fct_recode(job, 
                          "other" = "military",
                          "other" = "unemployed",
                          "other" = "transportation",
                          "other" = "retired",
                          "other" = "rather not say",
                          "other" = "political / government",
                          "other" = "clerical / administrative",
                          "other" = "hospitality / travel",
                          "other" = "law / legal services",
                          "other" = "construction / craftsmanship"
    ))


#Jobs variable

filtered <- astrology %>%
  select(sex, job, income, petowners, sign) %>%
  group_by(job, sex)%>%
  tally() %>% 
  rename(counted = n)

filtered$prob <- filtered$counted/sum(filtered$counted)

filtered <- filtered %>%
  mutate(conditional = ifelse(sex == "female", prob/.4023121, prob/(1-.4023121) ))

ggplot(filtered, aes(x=fct_reorder(job, counted, na.rm=TRUE), y = counted, fill = sex)) +
  geom_bar(stat = "identity", position = "dodge") +
  coord_flip() +
  theme_minimal() +
  labs(title="Jobs by Sex", y='Number of Individuals', x='Job Description')

ggplot(filtered, aes(x=fct_reorder(job, conditional, median, na.rm=TRUE), y = conditional, fill = sex)) +
  geom_bar(stat = "identity", position = "dodge") +
  coord_flip() +
  theme_minimal() +
  labs(title="Conditional Probability of Being M/F Given Job", y='Conditional Probability', x='Job Description')

# Petowners variable
dog <- astrology %>%
  select(sex, job, income, petowners, sign) %>%
  group_by(petowners, sex) %>%
  tally() %>% 
  rename(dogsandcats = n) 

dog$prob <- dog$dogsandcats/sum(dog$dogsandcats)

dog <- dog %>%
  mutate(conditional = ifelse(sex == "female", prob/.4023121, prob/(1-.4023121) )) %>%
  mutate(prop = dogsandcats/sum(dogsandcats))

ggplot(dog, aes(x=fct_reorder(petowners, dogsandcats, median, na.rm=TRUE), y = dogsandcats, fill = sex)) +
  geom_bar(stat = "identity", position = "fill", color = "violetred4", width = .5) +
  coord_flip() +
  theme_minimal() +
  labs(title="Pet Ownership", y='Proportion Male and Female', x='Pet Owner Status') 

ggplot(dog, aes(x=fct_reorder(petowners, conditional, median, na.rm=TRUE), y = conditional, fill = sex)) +
  geom_bar(stat = "identity", position = "dodge", color = "violetred4", width = .5) +
  coord_flip() +
  labs(title="Conditional Probability of Pet Ownership", y='Conditional Probability', x='Pet Owner Status') 

# income variable 

income <- astrology %>%
  select(sex, job, income, petowners, sign) %>%
  group_by(income, sex) %>%
  tally() %>% 
  rename(incomestats = n) %>%
  mutate(sum = sum(incomestats))

income <- income %>%
  mutate(names = paste(income, ": (n = ", sum, ")", sep=""))

ggplot(income, aes(x=fct_reorder(names, income), y = incomestats, fill = sex)) +
  geom_bar(stat = "identity", position = "fill", width = .6, color = "violetred4") +
  coord_flip() +
  theme_minimal() +
  labs(title="Income", y='Proportion Male and Female', x='Income in USD')

Question 2:

In the file HW-2_Shiny_App.Rmd, build the Shiny App discussed in Lec09 on Monday 10/3: Using the movies data set in the ggplot2movies data set, make a Shiny app that

  • Plots budget on the x-axis and rating on the y-axis
  • Instead of having a radio button to select the genre of movie (Action, Animation, Comedy, etc), have a radio button that allows you to toggle between comedies and non-comedies. This app should be simpler.