The dataset by proposed by Sunny Mehta, the details can be foundhere.

From the histogram, we can see that the overall distribution of rental prices is right skewed. if we then look and the ranked barplot, we see that a few locations have much higher rent prices than the rest of the country.

If we look at the yearly changes we find that New York experienced the sharpest drop in prices (-17.5%) and Wyoming increased the most (8.8%).

Import and Tidying

suppressMessages(library(tidyverse))
suppressMessages(library(lubridate))
suppressMessages(library(RMySQL))
mdb <- DBI::dbConnect(RMySQL::MySQL(),
  dbname='project2db',
  host = "35.224.164.217",
  user = "root",
  password = "2Password"#rstudioapi::askForPassword("Database password")
)

#code borrowed from https://dbplyr.tidyverse.org/articles/dbplyr.html

Define SQL query to extract required data

sql <- "SELECT * FROM project2db.RentPrice"

Data is retreived and loaded into a data frame

medrent <- 
  dbGetQuery(mdb, sql) %>% 
  gather(Month,Rent,3:14) %>% #Gather months into rows
  mutate(Month = as.Date(paste(Month,"-01",sep=""))) %>% #Convert strings to dates 
  mutate(Month = month(Month,label = TRUE, abbr = FALSE)) #Convert dates to month

head(medrent)
##     RegionName SizeRank   Month      Rent
## 1   California        1 January 1.8674136
## 2        Texas        2 January 0.9056244
## 3     New York        3 January 4.2682584
## 4      Florida        4 January 1.3180646
## 5     Illinois        5 January 1.2173913
## 6 Pennsylvania        6 January 1.0256410

Find the current (latest) distribution.

medrent12 <- #Create a new df that includes only the latest month
  medrent %>% 
  filter(Month == "December")
ggplot(medrent12, aes(Rent, fill = Rent)) +
  ggtitle("Distribution of Median Rental Price/sqft by State (Dec 2018, Zillow)")+
  geom_histogram(binwidth = 0.25, fill="steelblue") +
  theme(legend.position = "none")

p<-ggplot(data=medrent12, aes(x=reorder(RegionName,Rent), y=Rent), width=.5,position = position_dodge(width = 60)) +
  ggtitle("Median Rental Price/sqft by State (Dec 2018, Zillow)")+
  geom_bar(stat="identity", fill="steelblue")+
  theme_minimal() +
  #coord_fixed(ratio = 1, xlim = NULL, ylim = NULL) +
  coord_flip() 
p

Yearly % Change by State

medrent2 <- 
  read_csv("State_MedianRentalPricePerSqft_AllHomes.csv", #Impor the CSV file
    col_types = cols(SizeRank = col_integer())) %>% 
  mutate(ychange = ((`2018-12`/`2018-01`)-1)) %>% 
  select (-c(3:14))

head(medrent2)
## # A tibble: 6 x 3
##   RegionName   SizeRank ychange
##   <chr>           <int>   <dbl>
## 1 California          1  0.0359
## 2 Texas               2  0.0289
## 3 New York            3 -0.175 
## 4 Florida             4  0.0307
## 5 Illinois            5  0.0298
## 6 Pennsylvania        6  0.0156
p<-ggplot(data=medrent2, aes(x=reorder(RegionName,ychange), y=ychange), width=.5) +
  ggtitle("% Chg in Median Rental Price/sqft by State (2018, Zillow)")+
  geom_bar(stat="identity", fill="steelblue")+
  theme_minimal() +
  #coord_fixed(ratio = 1, xlim = NULL, ylim = NULL) +
  coord_flip() +
  scale_y_continuous(labels = scales::percent)
p