The dataset I have chosen this week is the American Housing Survery (AHS) from 2015. The dataset contains information regarding housing/renting issues in the United States. I will be examining how the dependent variable, tenure, varies based on various independent variables.

There are several factors affecting who is able to own their home rather than rent. Some people believe owning their home comes with good bragging rights, while this may be true, the ability to own a home varies. Not everyone has the same resources to be able to do this. Of course others may prefer the ease of renting their home as well. I will be excamining different factors that may affect who owns/rents their home.

I will be focusing on the variables:
1. TENURE (tenure): Whether the respondent owns or rents their home (1 = Own, 2 = Rent).
2. HHSEX (gender): The gender of the respondent (1 = Male, 2 = Female).
3. HHGRAD (education): The education level of the respondent.
(1 = Below High School, 2 = Some High School, 3 = High School or equivalent, 4 = Some College, 5 = Associate’sVocational/Technical/Trade, 6 = BA/BS, 7 = Master’s/Professional Degree/Doctoral Degree)
4. HHAGE (age): The age of the respondents.
5. HHCITSHP (citizenship): The citizenship status of the respondent (1 = Natural Born Citizen, 2 = Naturalized Citizen, Foreign born, 3 = Non-citizen, Foreign born)
6. HINCP (income): Household income of respondent.

library(dplyr)
library(ggplot2)
library(cowplot)
library(ggridges)
library(ggstance)
library(gganimate)
library(lattice)

Reading in Data

library(readr)
ahs <- read.csv("/Users/rachel_ramphal/Documents/Data Sets/ahs.csv")

Recoding Variables

library(dplyr)
ahs1<-ahs%>%
  mutate(tenure = ifelse(TENURE ==1,"Own",
                  ifelse(TENURE ==2,"Rent",
                  ifelse(TENURE ==3,"Own", NA))),
         
         education = ifelse(HHGRAD>31 & HHGRAD<=34,"Less than HS",
                     ifelse(HHGRAD>=35 & HHGRAD<=38,"Some HS",
                     ifelse(HHGRAD==39,"High School",
                     ifelse(HHGRAD==40, "Some College",
                     ifelse(HHGRAD>=41 & HHGRAD<= 43, "Associate's/Technical/Trade/Vocational",
                     ifelse(HHGRAD==44, "BA/BS",
                     ifelse(HHGRAD>=45 & HHGRAD<=47, "Master's/Doctoral/Professional Degree", NA))))))),
         
         education= factor(education,
                          levels= c("Less than HS", 
                                    "Some HS", 
                                    "High School", 
                                    "Some College", 
                                    "Associate's/Technical/Trade/Vocational", 
                                    "BA/BS", 
                                    "Master's/Doctoral/Professional Degree")),
       
        HHCITSHP=as.numeric(HHCITSHP),             
        citizenship = ifelse(HHCITSHP>= 1 & HHCITSHP<= 3, "Natural-born Citizen",
                      ifelse(HHCITSHP==4, "Naturalized Citizen",
                      ifelse(HHCITSHP==5, "Non-Citizen", NA))),
        
        citizenship = factor(citizenship),

        HHAGE=as.numeric(HHAGE),
        age = ifelse(HHAGE== -6, NA,HHAGE),
        
        gender = ifelse(HHSEX == 1, "Male",
                 ifelse(HHSEX == 2, "Female", NA)),
        
        HINCP = as.numeric(HINCP),
        income = ifelse(HINCP< 0, NA, HINCP))%>%
      
filter(!is.na(tenure))

Tenure By Gender

library(ggplot2)
tenure_gender <- ahs1 %>%
    group_by(gender, tenure)%>%
    summarize(n=n())%>%
    mutate(percent=n/sum(n))%>%
    #filter(tenure ==  "Own")%>%
    ggplot() + 
    geom_col(aes(x=gender, y = percent, fill=tenure)) + 
    facet_wrap(~tenure) + 
    labs(title = "Tenure by Gender", x = "Gender", y = "Percent") +
    scale_fill_brewer(palette = "Accent")

tenure_gender

Tenure by Age

tenure_age <- ahs1%>%
  ggplot(aes(x = tenure, y = age)) +
  geom_boxplot(color = "seagreen4", fill = "violetred2") + 
  labs(title = "Tenure by Age", x = "Tenure", y = "Age")

tenure_age

Tenure by Education

library(ggplot2)
tenure_educ <- ahs1 %>%
    filter(!is.na(education))%>%
    group_by(education, tenure)%>%
    summarize(n=n())%>%
    mutate(percent=n/sum(n))%>%
    #filter(tenure ==  "Own")%>%
    ggplot() + 
    geom_col(aes(x = education, y = percent, fill = tenure)) + 
    coord_flip() + 
    labs(title = "Tenure by Education", x = "Education Level", y = "Percent (%)") +
    scale_fill_brewer(palette = "Paired")

tenure_educ

Tenure By Citizenship Status

tenure_cit <- ahs1 %>%
    filter(!is.na(citizenship))%>%
    group_by(citizenship, tenure)%>%
    summarize(n=n())%>%
    mutate(percent=n/sum(n))%>%
    
    ggplot() + 
    geom_col(aes(x = citizenship, y = percent, fill = tenure), position = "dodge") +
    labs(title = "Tenure by Citizenship Status", x = "Citizenship Status", y = "Percent (%)") + 
    scale_fill_brewer(palette = "Set1")

tenure_cit

Tenure by Income

tenure_income <- ahs1%>%
    group_by(income, tenure)%>%
    summarize((n=n()))%>%
    ggplot(aes(x = tenure, y = income)) + 
    geom_violin(fill = "green4", size = 0.4) +
    labs(title = "Tenure by Income", x = "Tenure", y = "Income ($)")

tenure_income

Tenure by Income & Age

tenure_inc_age <- ahs1%>%
    ggplot() + geom_point(aes(x = age, y = income), shape =23, size = 3, fill = "cornflowerblue") + 
    facet_wrap(~tenure) +
    labs(title= "Tenure by Income & Age", x = "Age", y = "Income")

tenure_inc_age

All Charts