library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tseries)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(TSstudio)
library(tibble)
# Opening the data set
setwd("C:/Users/StarKid/Desktop/Data_Science/Data_101/final_project/zillow_data")
state_time_series <- read.csv("State_time_series.csv")
#source of data set: https://www.kaggle.com/datasets/zillow/zecon
dim(state_time_series)
## [1] 13212 82
glimpse(state_time_series)
## Rows: 13,212
## Columns: 82
## $ Date <chr> "1996-04…
## $ RegionName <chr> "Alabama…
## $ DaysOnZillow_AllHomes <dbl> NA, NA, …
## $ InventorySeasonallyAdjusted_AllHomes <int> NA, NA, …
## $ InventoryRaw_AllHomes <int> NA, NA, …
## $ MedianListingPricePerSqft_1Bedroom <dbl> NA, NA, …
## $ MedianListingPricePerSqft_2Bedroom <dbl> NA, NA, …
## $ MedianListingPricePerSqft_3Bedroom <dbl> NA, NA, …
## $ MedianListingPricePerSqft_4Bedroom <dbl> NA, NA, …
## $ MedianListingPricePerSqft_5BedroomOrMore <dbl> NA, NA, …
## $ MedianListingPricePerSqft_AllHomes <dbl> NA, NA, …
## $ MedianListingPricePerSqft_CondoCoop <dbl> NA, NA, …
## $ MedianListingPricePerSqft_DuplexTriplex <dbl> NA, NA, …
## $ MedianListingPricePerSqft_SingleFamilyResidence <dbl> NA, NA, …
## $ MedianListingPrice_1Bedroom <dbl> NA, NA, …
## $ MedianListingPrice_2Bedroom <dbl> NA, NA, …
## $ MedianListingPrice_3Bedroom <dbl> NA, NA, …
## $ MedianListingPrice_4Bedroom <dbl> NA, NA, …
## $ MedianListingPrice_5BedroomOrMore <dbl> NA, NA, …
## $ MedianListingPrice_AllHomes <dbl> NA, NA, …
## $ MedianListingPrice_CondoCoop <dbl> NA, NA, …
## $ MedianListingPrice_DuplexTriplex <dbl> NA, NA, …
## $ MedianListingPrice_SingleFamilyResidence <dbl> NA, NA, …
## $ MedianPctOfPriceReduction_AllHomes <dbl> NA, NA, …
## $ MedianPctOfPriceReduction_CondoCoop <dbl> NA, NA, …
## $ MedianPctOfPriceReduction_SingleFamilyResidence <dbl> NA, NA, …
## $ MedianPriceCutDollar_AllHomes <dbl> NA, NA, …
## $ MedianPriceCutDollar_CondoCoop <dbl> NA, NA, …
## $ MedianPriceCutDollar_SingleFamilyResidence <dbl> NA, NA, …
## $ MedianRentalPricePerSqft_1Bedroom <dbl> NA, NA, …
## $ MedianRentalPricePerSqft_2Bedroom <dbl> NA, NA, …
## $ MedianRentalPricePerSqft_3Bedroom <dbl> NA, NA, …
## $ MedianRentalPricePerSqft_4Bedroom <dbl> NA, NA, …
## $ MedianRentalPricePerSqft_5BedroomOrMore <dbl> NA, NA, …
## $ MedianRentalPricePerSqft_AllHomes <dbl> NA, NA, …
## $ MedianRentalPricePerSqft_CondoCoop <dbl> NA, NA, …
## $ MedianRentalPricePerSqft_DuplexTriplex <dbl> NA, NA, …
## $ MedianRentalPricePerSqft_MultiFamilyResidence5PlusUnits <dbl> NA, NA, …
## $ MedianRentalPricePerSqft_SingleFamilyResidence <dbl> NA, NA, …
## $ MedianRentalPricePerSqft_Studio <dbl> NA, NA, …
## $ MedianRentalPrice_1Bedroom <dbl> NA, NA, …
## $ MedianRentalPrice_2Bedroom <dbl> NA, NA, …
## $ MedianRentalPrice_3Bedroom <dbl> NA, NA, …
## $ MedianRentalPrice_4Bedroom <dbl> NA, NA, …
## $ MedianRentalPrice_5BedroomOrMore <dbl> NA, NA, …
## $ MedianRentalPrice_AllHomes <dbl> NA, NA, …
## $ MedianRentalPrice_CondoCoop <dbl> NA, NA, …
## $ MedianRentalPrice_DuplexTriplex <dbl> NA, NA, …
## $ MedianRentalPrice_MultiFamilyResidence5PlusUnits <dbl> NA, NA, …
## $ MedianRentalPrice_SingleFamilyResidence <dbl> NA, NA, …
## $ MedianRentalPrice_Studio <dbl> NA, NA, …
## $ ZHVIPerSqft_AllHomes <int> 50, 62, …
## $ PctOfHomesDecreasingInValues_AllHomes <dbl> NA, NA, …
## $ PctOfHomesIncreasingInValues_AllHomes <dbl> NA, NA, …
## $ PctOfHomesSellingForGain_AllHomes <dbl> NA, NA, …
## $ PctOfHomesSellingForLoss_AllHomes <dbl> NA, NA, …
## $ PctOfListingsWithPriceReductionsSeasAdj_AllHomes <dbl> NA, NA, …
## $ PctOfListingsWithPriceReductionsSeasAdj_CondoCoop <dbl> NA, NA, …
## $ PctOfListingsWithPriceReductionsSeasAdj_SingleFamilyResidence <dbl> NA, NA, …
## $ PctOfListingsWithPriceReductions_AllHomes <dbl> NA, NA, …
## $ PctOfListingsWithPriceReductions_CondoCoop <dbl> NA, NA, …
## $ PctOfListingsWithPriceReductions_SingleFamilyResidence <dbl> NA, NA, …
## $ PriceToRentRatio_AllHomes <dbl> NA, NA, …
## $ Sale_Counts <dbl> NA, NA, …
## $ Sale_Counts_Seas_Adj <dbl> NA, NA, …
## $ Sale_Prices <dbl> NA, NA, …
## $ ZHVI_1bedroom <int> 61500, 5…
## $ ZHVI_2bedroom <int> 48900, 8…
## $ ZHVI_3bedroom <int> 78200, 9…
## $ ZHVI_4bedroom <int> 146500, …
## $ ZHVI_5BedroomOrMore <int> 206300, …
## $ ZHVI_AllHomes <int> 79500, 1…
## $ ZHVI_BottomTier <int> 45600, 6…
## $ ZHVI_CondoCoop <int> 99500, 7…
## $ ZHVI_MiddleTier <int> 79500, 1…
## $ ZHVI_SingleFamilyResidence <int> 79000, 1…
## $ ZHVI_TopTier <int> 140200, …
## $ ZRI_AllHomes <int> NA, NA, …
## $ ZRI_AllHomesPlusMultifamily <int> NA, NA, …
## $ ZriPerSqft_AllHomes <dbl> NA, NA, …
## $ Zri_MultiFamilyResidenceRental <int> NA, NA, …
## $ Zri_SingleFamilyResidenceRental <int> NA, NA, …
#str(state_time_series)
#summary(state_time_series)
# Box plot of sale prices
par(mfrow = c(1,2))
boxplot(state_time_series$Sale_Prices, main = "Sales Price of Homes",
xlab = "Sales Price",
ylab= "Price",
outline = FALSE,
horizontal = TRUE)
# Histogram of sale prices
hist(state_time_series$Sale_Prices, col = "blue", breaks= 100,
main= "Sales Prices",
xlab= "Sales Price of homes",
ylab = "Amount of Homes Sold Per Price Range")
# Scatter Plot of Homes selling for a loss
plot(state_time_series$PctOfHomesSellingForLoss_AllHomes, main = "Percent of Homes Sold For A Loss",
xlab = "Years",
ylab = "Percentage")
plot(state_time_series$PctOfHomesIncreasingInValues_AllHomes, main = "Increase Value of Homes",
xlab= "From 1996 to 2017",
ylab = "Percentage")
#colSums(is.na(state_time_series))
max(state_time_series$MedianListingPrice_AllHomes, na.rm = T)
## [1] 610000
min(state_time_series$MedianListingPrice_AllHomes, na.rm = T)
## [1] 112944
max(max(state_time_series$MedianRentalPrice_AllHomes, na.rm = T))
## [1] 3600
min(state_time_series$MedianRentalPrice_AllHomes, na.rm = T)
## [1] 750
####Read in your dataset and calculate:
#head(state_time_series)
#tail(state_time_series)
#colSums(is.na(state_time_series))
# How many missing values do we have? there are 744
#sum(is.na(state_time_series$ZHVI_AllHomes))
#### Lets view the missing values and see if we need to clean.
# which(is.na(state_time_series$ZHVI_AllHomes))
# view(state_time_series$ZHVI_AllHomes)
############################################
# calculating the product of dimensions of dataframe
totalcells = prod(dim(state_time_series))
#print(state_time_series)
# calculating
totalcells = prod(dim(state_time_series))
print("Total number of cells")
## [1] "Total number of cells"
print(totalcells)
## [1] 1083384
#Calculate number of cell with na
missingcells = sum(is.na(state_time_series))
print("Missing value cells")
## [1] "Missing value cells"
print(missingcells)
## [1] 633838
#calculate missing values
percent = (missingcells * 100)/(totalcells)
print("Percentage of missing values cells")
## [1] "Percentage of missing values cells"
print(percent)
## [1] 58.50539
# Make a new variable to see the time series
zillow_state <- state_time_series %>%
select(Date, RegionName,ZHVI_AllHomes) %>%
drop_na(ZHVI_AllHomes)
# Change the Date to correct format
zillow_state$Date <- as.Date(zillow_state$Date, format = "%Y-%m-%d")
as_tibble(zillow_state)
## # A tibble: 12,438 × 3
## Date RegionName ZHVI_AllHomes
## <date> <chr> <int>
## 1 1996-04-30 Alabama 79500
## 2 1996-04-30 Arizona 103600
## 3 1996-04-30 Arkansas 64400
## 4 1996-04-30 California 157900
## 5 1996-04-30 Colorado 128100
## 6 1996-04-30 Connecticut 132000
## 7 1996-04-30 Delaware 106800
## 8 1996-04-30 Florida 86300
## 9 1996-04-30 Georgia 92000
## 10 1996-04-30 Hawaii 227400
## # ℹ 12,428 more rows
# which if its a data frame?
#view(zillow_state)
class(zillow_state)
## [1] "data.frame"
unique(zillow_state$RegionName) # All the Possible names in the data set
## [1] "Alabama" "Arizona" "Arkansas"
## [4] "California" "Colorado" "Connecticut"
## [7] "Delaware" "Florida" "Georgia"
## [10] "Hawaii" "Idaho" "Illinois"
## [13] "Indiana" "Iowa" "Kentucky"
## [16] "Maine" "Maryland" "Massachusetts"
## [19] "Michigan" "Minnesota" "Mississippi"
## [22] "Missouri" "Nebraska" "Nevada"
## [25] "NewHampshire" "NewJersey" "NewMexico"
## [28] "NorthCarolina" "Ohio" "Oklahoma"
## [31] "Oregon" "Pennsylvania" "RhodeIsland"
## [34] "SouthCarolina" "Tennessee" "Utah"
## [37] "Virginia" "Washington" "WestVirginia"
## [40] "SouthDakota" "Texas" "Wisconsin"
## [43] "Montana" "Wyoming" "Alaska"
## [46] "DistrictofColumbia" "Vermont" "NorthDakota"
## [49] "NewYork" "Kansas"
# Select DC Maryland and Virginia
dmv <- zillow_state %>%
filter(RegionName %in% c("Maryland", "Virginia", "DistrictofColumbia"))
# Maryland
md <- dmv %>%
filter(RegionName %in% c("Maryland"))
# Virginia
va <- dmv %>%
filter(RegionName %in% c("Virginia"))
# District of Columbia
dc <- dmv %>%
filter(RegionName %in% c("DistrictofColumbia"))
# now convert it to time series object each variable
zill_home_value_index <- ts(zillow_state$ZHVI_AllHomes, start = c(1996,4,30), frequency = 12)
# Create time series for Maryland, Virginia, and DC
md <- ts(md$ZHVI_AllHomes, start = c(1996, 4, 30),
frequency = 12) # frequency = 12 because the prices are reported every month. there are 12 months in a year.
va <- ts(va$ZHVI_AllHomes, start = c(1996, 4, 30),
frequency = 12)
dc <- ts(dc$ZHVI_AllHomes, start = c(1999,6,30),
frequency = 12) #they started to collect data at 1999
# Plot the variables
ts_plot(zill_home_value_index)
ts_plot(md,
title = "Zillow Home Value Index Maryland",
Xtitle = "Years",
Ytitle = "Estimated Home Prices")
ts_plot(va,
title = "Zillow Home Value Index Virginia",
Xtitle = "Years",
Ytitle = "Estimated Home Prices")
ts_plot(dc,
title = "Zillow Home Value Index D.C",
Xtitle = "Years",
Ytitle = "Estimated Home Prices")
# Plot Maryland Virginia and DC together
par(mfrow = c(1, 3))
plot(md, main="Maryland's Home Value",xlab="Time",ylab="House Prices")
plot(va, main="Virginia's Home Value",xlab="Time",ylab="House Prices")
plot(dc, main="DC's Home Value",xlab="Time",ylab="House Prices")
# Select the variables and create a new object zillow state rentals
zillow_state_rental <- state_time_series %>%
select(Date, RegionName, MedianRentalPrice_1Bedroom, MedianRentalPrice_2Bedroom, MedianRentalPrice_3Bedroom, MedianRentalPrice_4Bedroom, MedianRentalPrice_5BedroomOrMore, MedianRentalPrice_CondoCoop, MedianRentalPricePerSqft_DuplexTriplex, MedianRentalPrice_MultiFamilyResidence5PlusUnits, MedianRentalPrice_SingleFamilyResidence, MedianRentalPrice_Studio, MedianRentalPrice_AllHomes) %>%
na.omit
#zillow_state_rental %>%
# na.omit
# Check the structure of the data set
str(zillow_state_rental)
## 'data.frame': 671 obs. of 13 variables:
## $ Date : chr "2011-07-31" "2011-08-31" "2011-09-30" "2011-09-30" ...
## $ RegionName : chr "Florida" "Florida" "Florida" "NorthCarolina" ...
## $ MedianRentalPrice_1Bedroom : num 1100 1100 1100 695 1000 ...
## $ MedianRentalPrice_2Bedroom : num 1300 1295 1300 750 1250 ...
## $ MedianRentalPrice_3Bedroom : num 1500 1500 1500 999 1500 ...
## $ MedianRentalPrice_4Bedroom : num 2000 2000 1995 1395 1900 ...
## $ MedianRentalPrice_5BedroomOrMore : num 3000 3000 3000 1850 2995 ...
## $ MedianRentalPrice_CondoCoop : num 1400 1400 1400 850 1400 850 1400 850 1400 850 ...
## $ MedianRentalPricePerSqft_DuplexTriplex : num 0.868 0.868 0.876 0.656 0.877 ...
## $ MedianRentalPrice_MultiFamilyResidence5PlusUnits: num 1344 1325 1350 775 1300 ...
## $ MedianRentalPrice_SingleFamilyResidence : num 1400 1400 1400 1050 1400 1000 1400 1000 1400 1000 ...
## $ MedianRentalPrice_Studio : num 1300 1300 1300 1150 1300 ...
## $ MedianRentalPrice_AllHomes : num 1400 1400 1400 1000 1400 999 1400 995 1400 995 ...
## - attr(*, "na.action")= 'omit' Named int [1:12541] 1 2 3 4 5 6 7 8 9 10 ...
## ..- attr(*, "names")= chr [1:12541] "1" "2" "3" "4" ...
# they started to collect data for rent in 2010
#view(zillow_state_rental)
#label for Axis
label = c("1 Bed", "2 Bed", "3 Bed", "4 Bed", "5 Bed", "Condo", "Family", "Studio")
boxplot(zillow_state_rental$MedianRentalPrice_1Bedroom, zillow_state_rental$MedianRentalPrice_2Bedroom, zillow_state_rental$MedianRentalPrice_3Bedroom, zillow_state_rental$MedianRentalPrice_4Bedroom, zillow_state_rental$MedianRentalPrice_5BedroomOrMore, zillow_state_rental$MedianRentalPrice_CondoCoop, zillow_state_rental$MedianRentalPrice_SingleFamilyResidence, zillow_state_rental$MedianRentalPrice_Studio, main = "Rental Prices of All Homes in US", outline = F, names = label)
#zillow_state_rental
dmv_rental <- state_time_series %>%
select(Date, RegionName,MedianRentalPrice_1Bedroom, MedianRentalPrice_2Bedroom, MedianRentalPrice_3Bedroom, MedianRentalPrice_4Bedroom, MedianRentalPrice_5BedroomOrMore,
MedianRentalPrice_CondoCoop, MedianRentalPrice_SingleFamilyResidence, MedianRentalPrice_Studio, MedianRentalPrice_AllHomes) %>%
filter(RegionName %in% c("Maryland", "Virginia", "DistrictofColumbia", "UnitedStates")) %>%
na.omit
#view(dmv_rental)
# VA rental prices
va_rental <- dmv_rental %>%
filter(RegionName %in% c("Virginia"))
#view(va_rental)
boxplot(va_rental$MedianRentalPrice_1Bedroom, va_rental$MedianRentalPrice_2Bedroom, va_rental$MedianRentalPrice_3Bedroom, va_rental$MedianRentalPrice_4Bedroom, va_rental$MedianRentalPrice_5BedroomOrMore, va_rental$MedianRentalPrice_CondoCoop, va_rental$MedianRentalPrice_SingleFamilyResidence, va_rental$MedianRentalPrice_Studio, names = label, outline =F ,
main = "Rental Prices In Virginia",
xlab= "Types of Homes",
ylab = "Rental Prices")
# Maryland rental prices
md_rental <- dmv_rental %>%
filter(RegionName %in% c("Maryland"))
boxplot(md_rental$MedianRentalPrice_1Bedroom, md_rental$MedianRentalPrice_2Bedroom, md_rental$MedianRentalPrice_3Bedroom, md_rental$MedianRentalPrice_4Bedroom, md_rental$MedianRentalPrice_5BedroomOrMore,md_rental$MedianRentalPrice_CondoCoop, md_rental$MedianRentalPrice_SingleFamilyResidence, md_rental$MedianRentalPrice_Studio, names = label, outline = F,
main = "Rental Prices In Maryland",
xlab = "Types of Homes",
ylab = "Rental Prices")
# DC rental Prices
dc_zillow <- state_time_series %>%
filter(RegionName %in% c("DistrictofColumbia"))
#view(dc_zillow)
label_dc = c("1 Bed", "2 Bed", "3 Bed", "4 Bed", "Condo", "Family", "Studio")
boxplot(dc_zillow$MedianRentalPrice_1Bedroom, dc_zillow$MedianRentalPrice_2Bedroom, dc_zillow$MedianRentalPrice_3Bedroom, dc_zillow$MedianRentalPrice_4Bedroom, dc_zillow$MedianRentalPrice_CondoCoop, dc_zillow$MedianRentalPrice_SingleFamilyResidence, dc_zillow$MedianRentalPrice_Studio, outline = F, xlab = "Type of Homes", ylab ="Rental Prices", main = "Rental Prices in Washington DC", names = label_dc, ylim = c(0,5000))
# Two-sample t-test Maryland vs California
state_time_series %>%
filter(RegionName %in% c("Maryland", "California")) %>%
t.test(MedianRentalPrice_AllHomes ~ RegionName, data =.,
alternative = "two.sided")
##
## Welch Two Sample t-test
##
## data: MedianRentalPrice_AllHomes by RegionName
## t = 15.936, df = 101.15, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group California and group Maryland is not equal to 0
## 95 percent confidence interval:
## 435.9071 559.8594
## sample estimates:
## mean in group California mean in group Maryland
## 2147.404 1649.521
# Two-Sample t-test Virgina vs California
state_time_series %>%
filter(RegionName %in% c("Virginia", "California")) %>%
t.test(MedianRentalPrice_AllHomes ~ RegionName, data =.,
alternative = "two.sided")
##
## Welch Two Sample t-test
##
## data: MedianRentalPrice_AllHomes by RegionName
## t = 19.858, df = 112.16, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group California and group Virginia is not equal to 0
## 95 percent confidence interval:
## 574.3650 701.6857
## sample estimates:
## mean in group California mean in group Virginia
## 2147.404 1509.379
cali_rental <- state_time_series %>%
filter(RegionName %in% c("California"))
maryland_rental <- state_time_series %>%
filter(RegionName %in% c("Maryland"))
virginia_rental <- state_time_series %>%
filter(RegionName %in% c("Virginia"))
## Visualization of Distribution of samples
# Scatter plot and box plot
par(mfrow = c(2, 2))
plot(cali_rental$MedianRentalPrice_AllHomes, xlab = "From 1996 To 2017",
ylab = "Median Rental Prices",
main = "California Rental Prices", xlim = c(160,265))
boxplot(cali_rental$MedianRentalPrice_AllHomes,
xlab = "California Rental Prices",
ylab = "Rental Prices",
main = "California",
horizontal = TRUE,
outline = F)
plot(maryland_rental$MedianRentalPrice_AllHomes, xlab = "From 1996 To 2017",
ylab = "Median Rental Prices",
main = "Maryland Rental Prices",
xlim = c(160,265))
boxplot(maryland_rental$MedianRentalPrice_AllHomes,
xlab = "Maryland Rental Prices",
ylab = "Rental Prices",
main = "Maryland",
horizontal = TRUE,
outline = F)
# Scatter plot and box plot for median rental prices in Virginia
par(mfrow = c(2,2))
plot(cali_rental$MedianRentalPrice_AllHomes, xlab = "From 1996 To 2017",
ylab = "Median Rental Prices",
main = "California Rental Prices", xlim = c(160,265))
boxplot(cali_rental$MedianRentalPrice_AllHomes,
xlab = "California Rental Prices",
ylab = "Rental Prices",
main = "California",
horizontal = TRUE,
outline = F)
plot(virginia_rental$MedianRentalPrice_AllHomes, xlab = "From 1996 To 2017",
ylab = "Median Rental Prices",
main = "Virgina Rental Prices",
xlim = c(160,270))
boxplot(virginia_rental$MedianRentalPrice_AllHomes,
xlab = "Virginia",
ylab = "Rental Prices",
main = "Virgina Median Rental Prices",
horizontal = TRUE,
outline = F)
x = state_time_series$Sale_Prices
y = state_time_series$MedianRentalPrice_AllHomes
plot(state_time_series$Sale_Prices, state_time_series$MedianRentalPrice_AllHomes,
data = state_time_series,
main = "Median Rental Price vs Sales prices 1996-2017",
xlab = "Sale Price of Homes",
ylab = "Median Rental Price") +abline(lm(y~x), col = "red")
## Warning in plot.window(...): "data" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "data" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "data" is not a
## graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "data" is not a
## graphical parameter
## Warning in box(...): "data" is not a graphical parameter
## Warning in title(...): "data" is not a graphical parameter
## integer(0)
regression <- lm(MedianRentalPrice_AllHomes ~Sale_Prices, data=state_time_series)
summary(regression)
##
## Call:
## lm(formula = MedianRentalPrice_AllHomes ~ Sale_Prices, data = state_time_series)
##
## Residuals:
## Min 1Q Median 3Q Max
## -677.25 -172.02 -46.48 93.42 1803.93
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.943e+02 1.513e+01 32.66 <2e-16 ***
## Sale_Prices 4.549e-03 7.043e-05 64.58 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 306.1 on 3049 degrees of freedom
## (10161 observations deleted due to missingness)
## Multiple R-squared: 0.5777, Adjusted R-squared: 0.5776
## F-statistic: 4171 on 1 and 3049 DF, p-value: < 2.2e-16
#there for as the sales price of the house tends to increase so does the home rental price increases.
cor(state_time_series$Sale_Prices, state_time_series$MedianRentalPrice_AllHomes, use = "complete.obs")
## [1] 0.7600696
cor.test(state_time_series$Sale_Prices, state_time_series$MedianRentalPrice_AllHomes, alternative = "greater")
##
## Pearson's product-moment correlation
##
## data: state_time_series$Sale_Prices and state_time_series$MedianRentalPrice_AllHomes
## t = 64.584, df = 3049, p-value < 2.2e-16
## alternative hypothesis: true correlation is greater than 0
## 95 percent confidence interval:
## 0.7472004 1.0000000
## sample estimates:
## cor
## 0.7600696
# that shows the rent tends to increase to increase in sales price
# observed correlation coefficient of two vectors x and y