The dataset I have chosen this week is the American Housing Survery (AHS) from 2015. The dataset contains information regarding housing/renting issues in the United States. I will be examining how the dependent variable, tenure, varies based on various independent variables.
There are several factors affecting who is able to own their home rather than rent. Some people believe owning their home comes with good bragging rights, while this may be true, the ability to own a home varies. Not everyone has the same resources to be able to do this. Of course others may prefer the ease of renting their home as well. I will be excamining different factors that may affect who owns/rents their home.
I will be focusing on the variables:
1. TENURE (tenure): Whether the respondent owns or rents their home (1 = Own, 2 = Rent).
2. HHSEX (gender): The gender of the respondent (1 = Male, 2 = Female).
3. HHGRAD (education): The education level of the respondent.
(1 = Below High School, 2 = Some High School, 3 = High School or equivalent, 4 = Some College, 5 = Associate’sVocational/Technical/Trade, 6 = BA/BS, 7 = Master’s/Professional Degree/Doctoral Degree)
4. HHAGE (age): The age of the respondents.
5. HHCITSHP (citizenship): The citizenship status of the respondent (1 = Natural Born Citizen, 2 = Naturalized Citizen, Foreign born, 3 = Non-citizen, Foreign born)
6. HINCP (income): Household income of respondent.
library(dplyr)
library(ggplot2)
library(cowplot)
library(ggridges)
library(ggstance)
library(gganimate)
library(lattice)
library(readr)
ahs <- read.csv("/Users/rachel_ramphal/Documents/Data Sets/ahs.csv")
library(dplyr)
ahs1<-ahs%>%
mutate(tenure = ifelse(TENURE ==1,"Own",
ifelse(TENURE ==2,"Rent",
ifelse(TENURE ==3,"Own", NA))),
education = ifelse(HHGRAD>31 & HHGRAD<=34,"Less than HS",
ifelse(HHGRAD>=35 & HHGRAD<=38,"Some HS",
ifelse(HHGRAD==39,"High School",
ifelse(HHGRAD==40, "Some College",
ifelse(HHGRAD>=41 & HHGRAD<= 43, "Associate's/Technical/Trade/Vocational",
ifelse(HHGRAD==44, "BA/BS",
ifelse(HHGRAD>=45 & HHGRAD<=47, "Master's/Doctoral/Professional Degree", NA))))))),
education= factor(education,
levels= c("Less than HS",
"Some HS",
"High School",
"Some College",
"Associate's/Technical/Trade/Vocational",
"BA/BS",
"Master's/Doctoral/Professional Degree")),
HHCITSHP=as.numeric(HHCITSHP),
citizenship = ifelse(HHCITSHP>= 1 & HHCITSHP<= 3, "Natural-born Citizen",
ifelse(HHCITSHP==4, "Naturalized Citizen",
ifelse(HHCITSHP==5, "Non-Citizen", NA))),
citizenship = factor(citizenship),
HHAGE=as.numeric(HHAGE),
age = ifelse(HHAGE== -6, NA,HHAGE),
gender = ifelse(HHSEX == 1, "Male",
ifelse(HHSEX == 2, "Female", NA)),
HINCP = as.numeric(HINCP),
income = ifelse(HINCP< 0, NA, HINCP))%>%
filter(!is.na(tenure))
library(ggplot2)
tenure_gender <- ahs1 %>%
group_by(gender, tenure)%>%
summarize(n=n())%>%
mutate(percent=n/sum(n))%>%
#filter(tenure == "Own")%>%
ggplot() +
geom_col(aes(x=gender, y = percent, fill=tenure)) +
facet_wrap(~tenure) +
labs(title = "Tenure by Gender", x = "Gender", y = "Percent") +
scale_fill_brewer(palette = "Accent")
tenure_gender
tenure_age <- ahs1%>%
ggplot(aes(x = tenure, y = age)) +
geom_boxplot(color = "seagreen4", fill = "violetred2") +
labs(title = "Tenure by Age", x = "Tenure", y = "Age")
tenure_age
library(ggplot2)
tenure_educ <- ahs1 %>%
filter(!is.na(education))%>%
group_by(education, tenure)%>%
summarize(n=n())%>%
mutate(percent=n/sum(n))%>%
#filter(tenure == "Own")%>%
ggplot() +
geom_col(aes(x = education, y = percent, fill = tenure)) +
coord_flip() +
labs(title = "Tenure by Education", x = "Education Level", y = "Percent (%)") +
scale_fill_brewer(palette = "Paired")
tenure_educ
tenure_cit <- ahs1 %>%
filter(!is.na(citizenship))%>%
group_by(citizenship, tenure)%>%
summarize(n=n())%>%
mutate(percent=n/sum(n))%>%
ggplot() +
geom_col(aes(x = citizenship, y = percent, fill = tenure), position = "dodge") +
labs(title = "Tenure by Citizenship Status", x = "Citizenship Status", y = "Percent (%)") +
scale_fill_brewer(palette = "Set1")
tenure_cit
tenure_income <- ahs1%>%
group_by(income, tenure)%>%
summarize((n=n()))%>%
ggplot(aes(x = tenure, y = income)) +
geom_violin(fill = "green4", size = 0.4) +
labs(title = "Tenure by Income", x = "Tenure", y = "Income ($)")
tenure_income
tenure_inc_age <- ahs1%>%
ggplot() + geom_point(aes(x = age, y = income), shape =23, size = 3, fill = "cornflowerblue") +
facet_wrap(~tenure) +
labs(title= "Tenure by Income & Age", x = "Age", y = "Income")
tenure_inc_age