The dataset by proposed by Sunny Mehta, the details can be foundhere.
From the histogram, we can see that the overall distribution of rental prices is right skewed. if we then look and the ranked barplot, we see that a few locations have much higher rent prices than the rest of the country.
If we look at the yearly changes we find that New York experienced the sharpest drop in prices (-17.5%) and Wyoming increased the most (8.8%).
suppressMessages(library(tidyverse))
suppressMessages(library(lubridate))
suppressMessages(library(RMySQL))
mdb <- DBI::dbConnect(RMySQL::MySQL(),
dbname='project2db',
host = "35.224.164.217",
user = "root",
password = "2Password"#rstudioapi::askForPassword("Database password")
)
#code borrowed from https://dbplyr.tidyverse.org/articles/dbplyr.html
sql <- "SELECT * FROM project2db.RentPrice"
medrent <-
dbGetQuery(mdb, sql) %>%
gather(Month,Rent,3:14) %>% #Gather months into rows
mutate(Month = as.Date(paste(Month,"-01",sep=""))) %>% #Convert strings to dates
mutate(Month = month(Month,label = TRUE, abbr = FALSE)) #Convert dates to month
head(medrent)
## RegionName SizeRank Month Rent
## 1 California 1 January 1.8674136
## 2 Texas 2 January 0.9056244
## 3 New York 3 January 4.2682584
## 4 Florida 4 January 1.3180646
## 5 Illinois 5 January 1.2173913
## 6 Pennsylvania 6 January 1.0256410
medrent12 <- #Create a new df that includes only the latest month
medrent %>%
filter(Month == "December")
ggplot(medrent12, aes(Rent, fill = Rent)) +
ggtitle("Distribution of Median Rental Price/sqft by State (Dec 2018, Zillow)")+
geom_histogram(binwidth = 0.25, fill="steelblue") +
theme(legend.position = "none")
p<-ggplot(data=medrent12, aes(x=reorder(RegionName,Rent), y=Rent), width=.5,position = position_dodge(width = 60)) +
ggtitle("Median Rental Price/sqft by State (Dec 2018, Zillow)")+
geom_bar(stat="identity", fill="steelblue")+
theme_minimal() +
#coord_fixed(ratio = 1, xlim = NULL, ylim = NULL) +
coord_flip()
p
medrent2 <-
read_csv("State_MedianRentalPricePerSqft_AllHomes.csv", #Impor the CSV file
col_types = cols(SizeRank = col_integer())) %>%
mutate(ychange = ((`2018-12`/`2018-01`)-1)) %>%
select (-c(3:14))
head(medrent2)
## # A tibble: 6 x 3
## RegionName SizeRank ychange
## <chr> <int> <dbl>
## 1 California 1 0.0359
## 2 Texas 2 0.0289
## 3 New York 3 -0.175
## 4 Florida 4 0.0307
## 5 Illinois 5 0.0298
## 6 Pennsylvania 6 0.0156
p<-ggplot(data=medrent2, aes(x=reorder(RegionName,ychange), y=ychange), width=.5) +
ggtitle("% Chg in Median Rental Price/sqft by State (2018, Zillow)")+
geom_bar(stat="identity", fill="steelblue")+
theme_minimal() +
#coord_fixed(ratio = 1, xlim = NULL, ylim = NULL) +
coord_flip() +
scale_y_continuous(labels = scales::percent)
p