home <- read.csv("https://www.lock5stat.com/datasets3e/HomesForSale.csv")
ca <- subset(home, State == "CA")
Use the data only for California. How much does the size of a home influence its price?
df1 <- data.frame(Size = ca$Size, Price = ca$Price)
model1 <- lm(Price ~ Size, data = df1)
summary(model1)
##
## Call:
## lm(formula = Price ~ Size, data = df1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -462.55 -139.69 39.24 147.65 352.21
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -56.81675 154.68102 -0.367 0.716145
## Size 0.33919 0.08558 3.963 0.000463 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 219.3 on 28 degrees of freedom
## Multiple R-squared: 0.3594, Adjusted R-squared: 0.3365
## F-statistic: 15.71 on 1 and 28 DF, p-value: 0.0004634
Use the data only for California. How does the number of bedrooms of a home influence its price?
df2 <- data.frame(Beds = ca$Beds, Price = ca$Price)
model2 <- lm(Price ~ Beds, data = df2)
summary(model2)
##
## Call:
## lm(formula = Price ~ Beds, data = df2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -413.83 -236.62 29.94 197.69 570.94
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 269.76 233.62 1.155 0.258
## Beds 84.77 72.91 1.163 0.255
##
## Residual standard error: 267.6 on 28 degrees of freedom
## Multiple R-squared: 0.04605, Adjusted R-squared: 0.01198
## F-statistic: 1.352 on 1 and 28 DF, p-value: 0.2548
Use the data only for California. How does the number of bathrooms of a home influence its price?
df3 <- data.frame(Baths = ca$Baths, Price = ca$Price)
model3 <- lm(Price ~ Baths, data = df3)
summary(model3)
##
## Call:
## lm(formula = Price ~ Baths, data = df3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -374.93 -181.56 -2.74 152.31 614.81
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 90.71 148.57 0.611 0.54641
## Baths 194.74 62.28 3.127 0.00409 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 235.8 on 28 degrees of freedom
## Multiple R-squared: 0.2588, Adjusted R-squared: 0.2324
## F-statistic: 9.779 on 1 and 28 DF, p-value: 0.004092
Use the data only for California. How does the size, the number of bedrooms, and the number of bathrooms of a home jointly influence its price?
df4 <- data.frame(
Size = ca$Size,
Beds = ca$Beds,
Baths = ca$Baths,
Price = ca$Price
)
model4 <- lm(Price ~ Size + Beds + Baths, data = df4)
summary(model4)
##
## Call:
## lm(formula = Price ~ Size + Beds + Baths, data = df4)
##
## Residuals:
## Min 1Q Median 3Q Max
## -415.47 -130.32 19.64 154.79 384.94
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -41.5608 210.3809 -0.198 0.8449
## Size 0.2811 0.1189 2.364 0.0259 *
## Beds -33.7036 67.9255 -0.496 0.6239
## Baths 83.9844 76.7530 1.094 0.2839
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 221.8 on 26 degrees of freedom
## Multiple R-squared: 0.3912, Adjusted R-squared: 0.3209
## F-statistic: 5.568 on 3 and 26 DF, p-value: 0.004353
Are there significant differences in home prices among the four states (CA, NY, NJ, PA)? This will help you determine if the state in which a home is located has a significant impact on its price. All data should be used.
states <- subset(home, State %in% c("CA", "NY", "NJ", "PA"))
df5 <- data.frame(State = states$State, Price = states$Price)
anova_model <- aov(Price ~ State, data = df5)
summary(anova_model)
## Df Sum Sq Mean Sq F value Pr(>F)
## State 3 1198169 399390 7.355 0.000148 ***
## Residuals 116 6299266 54304
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1