library(lessR)
##
## lessR 4.4.1 feedback: gerbing@pdx.edu
## --------------------------------------------------------------
## > d <- Read("") Read data file, many formats available, e.g., Excel
## d is default data frame, data= in analysis routines optional
##
## Many examples of reading, writing, and manipulating data,
## graphics, testing means and proportions, regression, factor analysis,
## customization, forecasting, and aggregation from pivot tables
## Enter: browseVignettes("lessR")
##
## View lessR updates, now including time series forecasting
## Enter: news(package="lessR")
##
## Interactive data analysis
## Enter: interact()
##
## Attaching package: 'lessR'
## The following object is masked from 'package:base':
##
## sort_by
home = Read("https://www.lock5stat.com/datasets3e/HomesForSale.csv")
##
## >>> Suggestions
## Recommended binary format for data files: feather
## Create with Write(d, "your_file", format="feather")
## To read a csv or Excel file of variable labelsvar_labels=TRUE
## Each row of the file: Variable Name, Variable Label
## Read into a data frame named l (the letter el)
##
## More details about your data, Enter: details() for d, or details(name)
##
## Data Types
## ------------------------------------------------------------
## character: Non-numeric data values
## integer: Numeric data values, integers only
## double: Numeric data values with decimal digits
## ------------------------------------------------------------
##
## Variable Missing Unique
## Name Type Values Values Values First and last values
## ------------------------------------------------------------------------------------------
## 1 State character 120 0 4 CA CA CA ... PA PA PA
## 2 Price integer 120 0 98 533 610 899 ... 90 228 109
## 3 Size integer 120 0 111 1589 2008 2380 ... 1768 1732 1770
## 4 Beds integer 120 0 6 3 3 5 ... 3 3 3
## 5 Baths double 120 0 8 2.5 2 3 ... 2 4 2
## ------------------------------------------------------------------------------------------
head(home)
## State Price Size Beds Baths
## 1 CA 533 1589 3 2.5
## 2 CA 610 2008 3 2.0
## 3 CA 899 2380 5 3.0
## 4 CA 929 1868 3 3.0
## 5 CA 210 1360 2 2.0
## 6 CA 268 2131 3 2.0
This report explores how various features of homes (size, number of bedrooms, number of bathrooms) relate to their price using regression models. Additionally, it investigates whether home prices significantly differ by state.
All data was gathered from https://www.lock5stat.com/datasets3e/HomesForSale.csv
I will be performing an analysis of the data relating to the following questions:
home_CA <- subset(home, State == "CA")
# Simple linear regression: Price ~ Size
Regression(Price ~ Size, data=home_CA)
## >>> Suggestion
## # Create an R markdown file for interpretative output with Rmd = "file_name"
## Regression(my_formula=Price ~ Size, data=home_CA, Rmd="eg")
##
##
## BACKGROUND
##
## Data Frame: home_CA
##
## Response Variable: Price
## Predictor Variable: Size
##
## Number of cases (rows) of data: 30
## Number of cases retained for analysis: 30
##
##
## BASIC ANALYSIS
##
## Estimate Std Err t-value p-value Lower 95% Upper 95%
## (Intercept) -56.817 154.681 -0.367 0.716 -373.666 260.033
## Size 0.339 0.086 3.963 0.000 0.164 0.514
##
## Standard deviation of Price: 269.1774
##
## Standard deviation of residuals: 219.2566 for df=28
## 95% range of residuals: 898.2537 = 2 * (2.048 * 219.2566)
##
## R-squared: 0.359 Adjusted R-squared: 0.337 PRESS R-squared: 0.273
##
## Null hypothesis of all 0 population slope coefficients:
## F-statistic: 15.709 df: 1 and 28 p-value: 0.000
##
## -- Analysis of Variance
##
## df Sum Sq Mean Sq F-value p-value
## Model 1 755179.865 755179.865 15.709 0.000
## Residuals 28 1346057.102 48073.468
## Price 29 2101236.967 72456.447
##
##
## K-FOLD CROSS-VALIDATION
##
##
## RELATIONS AMONG THE VARIABLES
##
## Price Size
## Price 1.00 0.60
## Size 0.60 1.00
##
##
## RESIDUALS AND INFLUENCE
##
## -- Data, Fitted, Residual, Studentized Residual, Dffits, Cook's Distance
## [sorted by Cook's Distance]
## [n_res_rows = 20, out of 30 rows of data, or do n_res_rows="all"]
## -----------------------------------------------------------
## Size Price fitted resid rstdnt dffits cooks
## 21 2224 235 697.546 -462.546 -2.356 -0.637 0.175
## 7 2436 1095 769.454 325.546 1.615 0.556 0.146
## 6 2131 268 666.001 -398.001 -1.961 -0.477 0.103
## 12 2286 469 718.575 -249.575 -1.194 -0.347 0.059
## 10 1371 700 408.215 291.785 1.391 0.335 0.054
## 8 1375 699 409.572 289.428 1.379 0.330 0.053
## 4 1868 929 576.793 352.207 1.689 0.325 0.049
## 11 1440 145 431.619 -286.619 -1.360 -0.304 0.045
## 24 1688 195 515.739 -320.739 -1.523 -0.285 0.039
## 18 2479 929 784.039 144.961 0.696 0.251 0.032
## 3 2380 899 750.459 148.541 0.706 0.228 0.026
## 23 972 148 272.878 -124.878 -0.602 -0.227 0.026
## 5 1360 210 404.484 -194.484 -0.910 -0.222 0.025
## 17 1440 640 431.619 208.381 0.973 0.217 0.024
## 20 1309 559 387.185 171.815 0.804 0.207 0.022
## 25 1431 619 428.567 190.433 0.887 0.200 0.020
## 29 1828 368 563.226 -195.226 -0.903 -0.170 0.015
## 16 1419 285 424.496 -139.496 -0.646 -0.148 0.011
## 28 2340 835 736.892 98.108 0.462 0.143 0.010
## 15 1464 300 439.760 -139.760 -0.646 -0.141 0.010
##
##
## PREDICTION ERROR
##
## -- Data, Predicted, Standard Error of Prediction, 95% Prediction Intervals
## [sorted by lower bound of prediction interval]
## [to see all intervals add n_pred_rows="all"]
## ----------------------------------------------
##
## Size Price pred s_pred pi.lwr pi.upr width
## 22 834 180 226.069 236.148 -257.658 709.796 967.454
## 23 972 148 272.878 232.512 -203.402 749.158 952.560
## ...
## 1 1589 533 482.159 223.285 24.780 939.537 914.757
## 24 1688 195 515.739 222.936 59.075 972.402 913.327
## 29 1828 368 563.226 222.992 106.448 1020.004 913.556
## ...
## 7 2436 1095 769.454 230.574 297.145 1241.763 944.617
## 14 2474 841 782.343 231.428 308.285 1256.402 948.118
## 18 2479 929 784.039 231.544 309.744 1258.335 948.591
##
## ----------------------------------
## Plot 1: Distribution of Residuals
## Plot 2: Residuals vs Fitted Values
## ----------------------------------
# Scatter plot of Price vs Size with a regression line
Plot(Price, Size, data=home_CA, color="blue")
##
## >>> Suggestions or enter: style(suggest=FALSE)
## Plot(Price, Size, enhance=TRUE) # many options
## Plot(Price, Size, fill="skyblue") # interior fill color of points
## Plot(Price, Size, fit="lm", fit_se=c(.90,.99)) # fit line, stnd errors
## Plot(Price, Size, MD_cut=6) # Mahalanobis distance from center > 6 is an outlier
##
##
## >>> Pearson's product-moment correlation
##
## Number of paired values with neither missing, n = 30
## Sample Correlation of Price and Size: r = 0.599
##
## Hypothesis Test of 0 Correlation: t = 3.963, df = 28, p-value = 0.000
## 95% Confidence Interval for Correlation: 0.305 to 0.789
##
# Simple linear regression: Price ~ Beds
Regression(Price ~ Beds, data=home_CA)
## >>> Suggestion
## # Create an R markdown file for interpretative output with Rmd = "file_name"
## Regression(my_formula=Price ~ Beds, data=home_CA, Rmd="eg")
##
##
## BACKGROUND
##
## Data Frame: home_CA
##
## Response Variable: Price
## Predictor Variable: Beds
##
## Number of cases (rows) of data: 30
## Number of cases retained for analysis: 30
##
##
## BASIC ANALYSIS
##
## Estimate Std Err t-value p-value Lower 95% Upper 95%
## (Intercept) 269.762 233.618 1.155 0.258 -208.782 748.306
## Beds 84.767 72.911 1.163 0.255 -64.583 234.118
##
## Standard deviation of Price: 269.1774
##
## Standard deviation of residuals: 267.5599 for df=28
## 95% range of residuals: 1,096.143 = 2 * (2.048 * 267.5599)
##
## R-squared: 0.046 Adjusted R-squared: 0.012 PRESS R-squared: -0.128
##
## Null hypothesis of all 0 population slope coefficients:
## F-statistic: 1.352 df: 1 and 28 p-value: 0.255
##
## -- Analysis of Variance
##
## df Sum Sq Mean Sq F-value p-value
## Model 1 96764.729 96764.729 1.352 0.255
## Residuals 28 2004472.238 71588.294
## Price 29 2101236.967 72456.447
##
##
## K-FOLD CROSS-VALIDATION
##
##
## RELATIONS AMONG THE VARIABLES
##
## Price Beds
## Price 1.00 0.21
## Beds 0.21 1.00
##
##
## RESIDUALS AND INFLUENCE
##
## -- Data, Fitted, Residual, Studentized Residual, Dffits, Cook's Distance
## [sorted by Cook's Distance]
## [n_res_rows = 20, out of 30 rows of data, or do n_res_rows="all"]
## --------------------------------------------------------
## Beds Price fitted resid rstdnt dffits cooks
## 29 5 368 693.599 -325.599 -1.477 -0.948 0.432
## 3 5 899 693.599 205.401 0.910 0.584 0.172
## 24 4 195 608.832 -413.832 -1.672 -0.523 0.128
## 7 3 1095 524.064 570.936 2.339 0.443 0.085
## 8 2 699 439.297 259.703 1.041 0.400 0.080
## 22 2 180 439.297 -259.297 -1.040 -0.400 0.080
## 18 4 929 608.832 320.168 1.267 0.396 0.077
## 5 2 210 439.297 -229.297 -0.915 -0.352 0.062
## 4 3 929 524.064 404.936 1.581 0.300 0.043
## 11 3 145 524.064 -379.064 -1.472 -0.279 0.037
## 23 3 148 524.064 -376.064 -1.459 -0.276 0.037
## 14 3 841 524.064 316.936 1.216 0.230 0.026
## 28 3 835 524.064 310.936 1.192 0.226 0.025
## 21 3 235 524.064 -289.064 -1.104 -0.209 0.022
## 6 3 268 524.064 -256.064 -0.973 -0.184 0.017
## 16 3 285 524.064 -239.064 -0.906 -0.172 0.015
## 15 3 300 524.064 -224.064 -0.848 -0.161 0.013
## 9 3 729 524.064 204.936 0.774 0.147 0.011
## 19 4 714 608.832 105.168 0.406 0.127 0.008
## 10 3 700 524.064 175.936 0.663 0.126 0.008
##
##
## PREDICTION ERROR
##
## -- Data, Predicted, Standard Error of Prediction, 95% Prediction Intervals
## [sorted by lower bound of prediction interval]
## [to see all intervals add n_pred_rows="all"]
## ----------------------------------------------
##
## Beds Price pred s_pred pi.lwr pi.upr width
## 5 2 210 439.297 284.258 -142.979 1021.573 1164.552
## 8 2 699 439.297 284.258 -142.979 1021.573 1164.552
## ...
## 22 2 180 439.297 284.258 -142.979 1021.573 1164.552
## ...
## 22.1 2 180 439.297 284.258 -142.979 1021.573 1164.552
## 1 3 533 524.064 272.156 -33.423 1081.551 1114.974
## 2 3 610 524.064 272.156 -33.423 1081.551 1114.974
## ...
## 24 4 195 608.832 279.227 36.862 1180.801 1143.939
## 3 5 899 693.599 304.134 70.608 1316.590 1245.982
## 29 5 368 693.599 304.134 70.608 1316.590 1245.982
##
## ----------------------------------
## Plot 1: Distribution of Residuals
## Plot 2: Residuals vs Fitted Values
## ----------------------------------
# Scatter plot of Price vs Beds
Plot(Price, Beds, data=home_CA, color="green")
##
## >>> Suggestions or enter: style(suggest=FALSE)
## Plot(Price, Beds, enhance=TRUE) # many options
## Plot(Price, Beds, fit="lm", fit_se=c(.90,.99)) # fit line, stnd errors
## Plot(Price, Beds, out_cut=.10) # label top 10% from center as outliers
##
##
## >>> Pearson's product-moment correlation
##
## Number of paired values with neither missing, n = 30
## Sample Correlation of Price and Beds: r = 0.215
##
## Hypothesis Test of 0 Correlation: t = 1.163, df = 28, p-value = 0.255
## 95% Confidence Interval for Correlation: -0.158 to 0.534
##
# Simple linear regression: Price ~ Baths
Regression(Price ~ Baths, data=home_CA)
## >>> Suggestion
## # Create an R markdown file for interpretative output with Rmd = "file_name"
## Regression(my_formula=Price ~ Baths, data=home_CA, Rmd="eg")
##
##
## BACKGROUND
##
## Data Frame: home_CA
##
## Response Variable: Price
## Predictor Variable: Baths
##
## Number of cases (rows) of data: 30
## Number of cases retained for analysis: 30
##
##
## BASIC ANALYSIS
##
## Estimate Std Err t-value p-value Lower 95% Upper 95%
## (Intercept) 90.712 148.571 0.611 0.546 -213.622 395.047
## Baths 194.739 62.275 3.127 0.004 67.174 322.304
##
## Standard deviation of Price: 269.1774
##
## Standard deviation of residuals: 235.8384 for df=28
## 95% range of residuals: 966.1862 = 2 * (2.048 * 235.8384)
##
## R-squared: 0.259 Adjusted R-squared: 0.232 PRESS R-squared: 0.149
##
## Null hypothesis of all 0 population slope coefficients:
## F-statistic: 9.779 df: 1 and 28 p-value: 0.004
##
## -- Analysis of Variance
##
## df Sum Sq Mean Sq F-value p-value
## Model 1 543883.601 543883.601 9.779 0.004
## Residuals 28 1557353.365 55619.763
## Price 29 2101236.967 72456.447
##
##
## K-FOLD CROSS-VALIDATION
##
##
## RELATIONS AMONG THE VARIABLES
##
## Price Baths
## Price 1.00 0.51
## Baths 0.51 1.00
##
##
## RESIDUALS AND INFLUENCE
##
## -- Data, Fitted, Residual, Studentized Residual, Dffits, Cook's Distance
## [sorted by Cook's Distance]
## [n_res_rows = 20, out of 30 rows of data, or do n_res_rows="all"]
## --------------------------------------------------------
## Baths Price fitted resid rstdnt dffits cooks
## 8 1.000 699 285.451 413.549 1.999 0.834 0.314
## 7 2.000 1095 480.191 614.809 3.020 0.608 0.143
## 15 3.000 300 674.930 -374.930 -1.703 -0.464 0.101
## 9 4.000 729 869.669 -140.669 -0.677 -0.379 0.073
## 4 3.000 929 674.930 254.070 1.122 0.306 0.046
## 18 3.000 929 674.930 254.070 1.122 0.306 0.046
## 11 2.000 145 480.191 -335.191 -1.480 -0.298 0.043
## 3 3.000 899 674.930 224.070 0.984 0.268 0.036
## 23 1.000 148 285.451 -137.451 -0.625 -0.260 0.035
## 24 2.000 195 480.191 -285.191 -1.246 -0.251 0.031
## 12 3.000 469 674.930 -205.930 -0.902 -0.246 0.030
## 5 2.000 210 480.191 -270.191 -1.177 -0.237 0.028
## 21 2.000 235 480.191 -245.191 -1.063 -0.214 0.023
## 22 1.000 180 285.451 -105.451 -0.478 -0.199 0.020
## 28 3.000 835 674.930 160.070 0.697 0.190 0.018
## 10 2.000 700 480.191 219.809 0.949 0.191 0.018
## 6 2.000 268 480.191 -212.191 -0.915 -0.184 0.017
## 16 2.000 285 480.191 -195.191 -0.840 -0.169 0.014
## 17 2.000 640 480.191 159.809 0.685 0.138 0.010
## 14 3.500 841 772.299 68.701 0.308 0.123 0.008
##
##
## PREDICTION ERROR
##
## -- Data, Predicted, Standard Error of Prediction, 95% Prediction Intervals
## [sorted by lower bound of prediction interval]
## [to see all intervals add n_pred_rows="all"]
## ----------------------------------------------
##
## Baths Price pred s_pred pi.lwr pi.upr width
## 8 1.000 699 285.451 252.707 -232.196 803.099 1035.294
## 22 1.000 180 285.451 252.707 -232.196 803.099 1035.294
## 23 1.000 148 285.451 252.707 -232.196 803.099 1035.294
## ...
## 30 2.000 360 480.191 240.385 -12.216 972.598 984.814
## 1 2.500 533 577.560 240.116 85.704 1069.416 983.712
## 25 2.500 619 577.560 240.116 85.704 1069.416 983.712
## ...
## 14 3.500 841 772.299 251.425 257.278 1287.320 1030.042
## 9 4.000 729 869.669 262.493 331.976 1407.361 1075.385
##
## ----------------------------------
## Plot 1: Distribution of Residuals
## Plot 2: Residuals vs Fitted Values
## ----------------------------------
# Scatter plot of Price vs Baths
Plot(Price, Baths, data=home_CA, color="purple")
##
## >>> Suggestions or enter: style(suggest=FALSE)
## Plot(Price, Baths, enhance=TRUE) # many options
## Plot(Price, Baths, fit="lm", fit_se=c(.90,.99)) # fit line, stnd errors
## Plot(Price, Baths, MD_cut=6) # Mahalanobis distance from center > 6 is an outlier
##
##
## >>> Pearson's product-moment correlation
##
## Number of paired values with neither missing, n = 30
## Sample Correlation of Price and Baths: r = 0.509
##
## Hypothesis Test of 0 Correlation: t = 3.127, df = 28, p-value = 0.004
## 95% Confidence Interval for Correlation: 0.182 to 0.734
##
# Multiple linear regression: Price ~ Size + Beds + Baths
Regression(Price ~ Size + Beds + Baths, data=home_CA)
## >>> Suggestion
## # Create an R markdown file for interpretative output with Rmd = "file_name"
## Regression(my_formula=Price ~ Size + Beds + Baths, data=home_CA, Rmd="eg")
##
##
## BACKGROUND
##
## Data Frame: home_CA
##
## Response Variable: Price
## Predictor Variable 1: Size
## Predictor Variable 2: Beds
## Predictor Variable 3: Baths
##
## Number of cases (rows) of data: 30
## Number of cases retained for analysis: 30
##
##
## BASIC ANALYSIS
##
## Estimate Std Err t-value p-value Lower 95% Upper 95%
## (Intercept) -41.561 210.381 -0.198 0.845 -474.005 390.883
## Size 0.281 0.119 2.364 0.026 0.037 0.526
## Beds -33.704 67.926 -0.496 0.624 -173.326 105.919
## Baths 83.984 76.753 1.094 0.284 -73.784 241.752
##
## Standard deviation of Price: 269.1774
##
## Standard deviation of residuals: 221.8196 for df=26
## 95% range of residuals: 911.9134 = 2 * (2.056 * 221.8196)
##
## R-squared: 0.391 Adjusted R-squared: 0.321 PRESS R-squared: 0.167
##
## Null hypothesis of all 0 population slope coefficients:
## F-statistic: 5.568 df: 3 and 26 p-value: 0.004
##
## -- Analysis of Variance
##
## df Sum Sq Mean Sq F-value p-value
## Size 1 755179.865 755179.865 15.348 0.001
## Beds 1 7842.365 7842.365 0.159 0.693
## Baths 1 58912.405 58912.405 1.197 0.284
##
## Model 3 821934.634 273978.211 5.568 0.004
## Residuals 26 1279302.332 49203.936
## Price 29 2101236.967 72456.447
##
##
## K-FOLD CROSS-VALIDATION
##
##
## RELATIONS AMONG THE VARIABLES
##
## Price Size Beds Baths
## Price 1.00 0.60 0.21 0.51
## Size 0.60 1.00 0.45 0.64
## Beds 0.21 0.45 1.00 0.35
## Baths 0.51 0.64 0.35 1.00
##
## Tolerance VIF
## Size 0.530 1.887
## Beds 0.792 1.263
## Baths 0.582 1.717
##
## Size Beds Baths R2adj X's
## 1 0 1 0.340 2
## 1 0 0 0.337 1
## 1 1 1 0.321 3
## 1 1 0 0.316 2
## 0 0 1 0.232 1
## 0 1 1 0.206 2
## 0 1 0 0.012 1
##
## [based on Thomas Lumley's leaps function from the leaps package]
##
##
## RESIDUALS AND INFLUENCE
##
## -- Data, Fitted, Residual, Studentized Residual, Dffits, Cook's Distance
## [sorted by Cook's Distance]
## [n_res_rows = 20, out of 30 rows of data, or do n_res_rows="all"]
## -----------------------------------------------------------------------
## Size Beds Baths Price fitted resid rstdnt dffits cooks
## 7 2436 3 2.000 1095 710.063 384.937 2.108 1.159 0.297
## 8 1375 2 1.000 699 361.532 337.468 1.791 0.944 0.205
## 21 2224 3 2.000 235 650.469 -415.469 -2.165 -0.893 0.174
## 3 2380 5 3.000 899 710.899 188.101 1.009 0.649 0.105
## 6 2131 3 2.000 268 624.327 -356.327 -1.777 -0.642 0.095
## 24 1688 4 2.000 195 466.095 -271.095 -1.322 -0.490 0.058
## 15 1464 3 3.000 300 520.816 -220.816 -1.091 -0.477 0.057
## 12 2286 3 3.000 469 751.882 -282.882 -1.372 -0.473 0.054
## 29 1828 5 2.000 368 471.746 -103.746 -0.582 -0.447 0.051
## 5 1360 2 2.000 210 441.300 -231.300 -1.125 -0.438 0.047
## 4 1868 3 3.000 929 634.381 294.619 1.415 0.433 0.045
## 10 1371 3 2.000 700 410.689 289.311 1.365 0.333 0.027
## 11 1440 3 2.000 145 430.085 -285.085 -1.337 -0.300 0.022
## 18 2479 4 3.000 929 772.431 156.569 0.750 0.290 0.021
## 17 1440 3 2.000 640 430.085 209.915 0.969 0.217 0.012
## 25 1431 3 2.500 619 469.547 149.453 0.695 0.205 0.011
## 20 1309 3 2.000 559 393.261 165.739 0.767 0.204 0.011
## 23 972 3 1.000 148 214.545 -66.545 -0.325 -0.151 0.006
## 16 1419 3 2.000 285 424.182 -139.182 -0.636 -0.146 0.005
## 28 2340 3 3.000 835 767.062 67.938 0.320 0.116 0.003
##
##
## PREDICTION ERROR
##
## -- Data, Predicted, Standard Error of Prediction, 95% Prediction Intervals
## [sorted by lower bound of prediction interval]
## [to see all intervals add n_pred_rows="all"]
## ----------------------------------------------
##
## Size Beds Baths Price pred s_pred pi.lwr pi.upr width
## 20 1309 3 2.000 559 393.261 229.039 -77.537 864.058 941.595
## 29 1828 5 2.000 368 471.746 259.729 -62.135 1005.627 1067.762
## 10 1371 3 2.000 700 410.689 227.980 -57.930 879.308 937.239
## ...
## 27 1477 3 2.000 408 440.486 226.713 -25.530 906.501 932.031
## 26 1488 3 2.000 549 443.578 226.621 -22.249 909.405 931.654
## 24 1688 4 2.000 195 466.095 234.819 -16.582 948.772 965.354
## ...
## 28 2340 3 3.000 835 767.062 234.364 285.319 1248.804 963.485
## 18 2479 4 3.000 929 772.431 235.772 287.795 1257.068 969.273
## 14 2474 3 3.500 841 846.722 241.681 349.940 1343.504 993.564
##
## ----------------------------------
## Plot 1: Distribution of Residuals
## Plot 2: Residuals vs Fitted Values
## ----------------------------------
# How Size influences price
Plot(Price, Size, data=home_CA, color="blue")
##
## >>> Suggestions or enter: style(suggest=FALSE)
## Plot(Price, Size, enhance=TRUE) # many options
## Plot(Price, Size, fill="skyblue") # interior fill color of points
## Plot(Price, Size, fit="lm", fit_se=c(.90,.99)) # fit line, stnd errors
## Plot(Price, Size, out_cut=.10) # label top 10% from center as outliers
##
##
## >>> Pearson's product-moment correlation
##
## Number of paired values with neither missing, n = 30
## Sample Correlation of Price and Size: r = 0.599
##
## Hypothesis Test of 0 Correlation: t = 3.963, df = 28, p-value = 0.000
## 95% Confidence Interval for Correlation: 0.305 to 0.789
##
# How beds influence price
Plot(Price, Beds, data=home_CA, color="green")
##
## >>> Suggestions or enter: style(suggest=FALSE)
## Plot(Price, Beds, enhance=TRUE) # many options
## Plot(Price, Beds, fill="skyblue") # interior fill color of points
## Plot(Price, Beds, fit="lm", fit_se=c(.90,.99)) # fit line, stnd errors
## Plot(Price, Beds, out_cut=.10) # label top 10% from center as outliers
##
##
## >>> Pearson's product-moment correlation
##
## Number of paired values with neither missing, n = 30
## Sample Correlation of Price and Beds: r = 0.215
##
## Hypothesis Test of 0 Correlation: t = 1.163, df = 28, p-value = 0.255
## 95% Confidence Interval for Correlation: -0.158 to 0.534
##
# How baths influence price
Plot(Price, Baths, data=home_CA, color="purple")
##
## >>> Suggestions or enter: style(suggest=FALSE)
## Plot(Price, Baths, enhance=TRUE) # many options
## Plot(Price, Baths, fit="lm", fit_se=c(.90,.99)) # fit line, stnd errors
## Plot(Price, Baths, out_cut=.10) # label top 10% from center as outliers
##
##
## >>> Pearson's product-moment correlation
##
## Number of paired values with neither missing, n = 30
## Sample Correlation of Price and Baths: r = 0.509
##
## Hypothesis Test of 0 Correlation: t = 3.127, df = 28, p-value = 0.004
## 95% Confidence Interval for Correlation: 0.182 to 0.734
##
ANOVA(Price ~ State, data=home)
##
## BACKGROUND
##
## Response Variable: Price
##
## Factor Variable: State
## Levels: CA NJ NY PA
##
## Number of cases (rows) of data: 120
## Number of cases retained for analysis: 120
##
##
## DESCRIPTIVE STATISTICS
##
## n mean sd min max
## CA 30 535.37 269.18 145.00 1095.00
## NJ 30 328.53 157.97 115.00 650.00
## NY 30 365.33 317.82 35.00 1250.00
## PA 30 265.57 137.09 50.00 550.00
##
## Grand Mean: 373.7
##
##
## ANOVA
##
## df Sum Sq Mean Sq F-value p-value
## State 3 1198168.73 399389.58 7.35 0.0001
## Residuals 116 6299266.47 54304.02
##
## R Squared: 0.160
## R Sq Adjusted: 0.138
## Omega Squared: 0.137
##
##
## Cohen's f: 0.399
##
##
## TUKEY MULTIPLE COMPARISONS OF MEANS
##
## Family-wise Confidence Level: 0.95
## -------------------------------------
## diff lwr upr p adj
## NJ-CA -206.83 -363.67 -49.99 0.00
## NY-CA -170.03 -326.87 -13.19 0.03
## PA-CA -269.80 -426.64 -112.96 0.00
## NY-NJ 36.80 -120.04 193.64 0.93
## PA-NJ -62.97 -219.81 93.87 0.72
## PA-NY -99.77 -256.61 57.07 0.35
##
##
## RESIDUALS
##
## Fitted Values, Residuals, Standardized Residuals
## [sorted by Standardized Residuals, ignoring + or - sign]
## [res_rows = 20, out of 120 cases (rows) of data, or res_rows="all"]
## ------------------------------------------
## State Price fitted residual z-resid
## 63 NY 1250 365.33 884.67 3.86
## 86 NY 1000 365.33 634.67 2.77
## 87 NY 929 365.33 563.67 2.46
## 7 CA 1095 535.37 559.63 2.44
## 88 NY 875 365.33 509.67 2.22
## 64 NY 825 365.33 459.67 2.01
## 75 NY 775 365.33 409.67 1.79
## 18 CA 929 535.37 393.63 1.72
## 4 CA 929 535.37 393.63 1.72
## 11 CA 145 535.37 -390.37 -1.70
## 23 CA 148 535.37 -387.37 -1.69
## 3 CA 899 535.37 363.63 1.59
## 22 CA 180 535.37 -355.37 -1.55
## 24 CA 195 535.37 -340.37 -1.49
## 65 NY 35 365.33 -330.33 -1.44
## 5 CA 210 535.37 -325.37 -1.42
## 54 NJ 650 328.53 321.47 1.40
## 57 NJ 639 328.53 310.47 1.36
## 14 CA 841 535.37 305.63 1.33
## 21 CA 235 535.37 -300.37 -1.31
##
## ----------------------------------------
## Plot 1: 95% family-wise confidence level
## Plot 2: Scatterplot with Cell Means
## ----------------------------------------
# Boxplot of Price by State for visual comparison
Plot(State, Price, data=home, fill="skyblue", main="Home Prices by State (Boxplot)")
##
## >>> Suggestions or enter: style(suggest=FALSE)
## Plot(State, Price, data=home, fill="skyblue", main="Home Prices by State (Boxplot)", means=FALSE) # do not plot means
## Plot(State, Price, data=home, fill="skyblue", main="Home Prices by State (Boxplot)", stat="mean") # only plot means
## ANOVA(Price ~ State) # inferential analysis
##
## Price
## - by levels of -
## State
##
## n miss mean sd min mdn max
## CA 30 0 535.367 269.177 145.000 554.000 1095.000
## NJ 30 0 328.533 157.973 115.000 279.000 650.000
## NY 30 0 365.333 317.822 35.000 256.500 1250.000
## PA 30 0 265.567 137.089 50.000 232.500 550.000
##