This section contains all the standard required library attachment which may be referred below during processing
## Warning: package 'ggplot2' was built under R version 3.4.1
## Warning: package 'ggthemes' was built under R version 3.4.1
## Warning: package 'scales' was built under R version 3.4.1
## Warning: package 'dplyr' was built under R version 3.4.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Warning: package 'mice' was built under R version 3.4.2
## Loading required package: lattice
## Warning: package 'randomForest' was built under R version 3.4.1
## randomForest 4.6-12
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:dplyr':
##
## combine
## The following object is masked from 'package:ggplot2':
##
## margin
## Warning: package 'rpart' was built under R version 3.4.2
## Warning: package 'ROCR' was built under R version 3.4.1
## Loading required package: gplots
## Warning: package 'gplots' was built under R version 3.4.1
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
## Warning: package 'rpart.plot' was built under R version 3.4.2
## Warning: package 'corrr' was built under R version 3.4.1
## Warning: package 'corrplot' was built under R version 3.4.2
## corrplot 0.84 loaded
## Warning: package 'glue' was built under R version 3.4.2
##
## Attaching package: 'glue'
## The following object is masked from 'package:dplyr':
##
## collapse
## Warning: package 'caTools' was built under R version 3.4.1
## Warning: package 'data.table' was built under R version 3.4.2
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## Loading required package: knitr
## Warning: package 'knitr' was built under R version 3.4.2
## Loading required package: geosphere
## Warning: package 'geosphere' was built under R version 3.4.2
## Loading required package: gmapsdistance
## Warning: package 'gmapsdistance' was built under R version 3.4.2
## Loading required package: tidyr
## Warning: package 'tidyr' was built under R version 3.4.2
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:mice':
##
## complete
## Warning: package 'car' was built under R version 3.4.2
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## Warning: package 'caret' was built under R version 3.4.1
## Warning: package 'gclus' was built under R version 3.4.1
## Loading required package: cluster
## Warning: package 'visdat' was built under R version 3.4.1
## Warning: package 'psych' was built under R version 3.4.2
##
## Attaching package: 'psych'
## The following object is masked from 'package:car':
##
## logit
## The following object is masked from 'package:randomForest':
##
## outlier
## The following objects are masked from 'package:scales':
##
## alpha, rescale
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
## Warning: package 'leaflet' was built under R version 3.4.1
## Warning: package 'leaflet.extras' was built under R version 3.4.1
## Warning: package 'GPArotation' was built under R version 3.4.1
## Warning: package 'MVN' was built under R version 3.4.2
## sROC 0.1-2 loaded
##
## Attaching package: 'MVN'
## The following object is masked from 'package:psych':
##
## mardia
## Warning: package 'MASS' was built under R version 3.4.1
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## Warning: package 'psy' was built under R version 3.4.1
##
## Attaching package: 'psy'
## The following object is masked from 'package:psych':
##
## wkappa
## Warning: package 'corpcor' was built under R version 3.4.1
## Warning: package 'fastmatch' was built under R version 3.4.1
##
## Attaching package: 'fastmatch'
## The following object is masked from 'package:dplyr':
##
## coalesce
## Warning: package 'plyr' was built under R version 3.4.1
## -------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## -------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
This sction is meant for data loading and data visualisation. Data Analysis shows that Size has outliers. Any value more than than 1.5 time of IQQR is assumed as Outliers. Following section also contains descriptive stats.
Variable Analysis.. – Regressor/Response Variable is Price – All others are indendent variable –Property Size : Box Plot and Summary shows three outliers. Since % of outliers is approx 10% of data so that I have kept these outliers into my analysis…
my_property_data <- read.csv('Dataset_LeslieSalt.csv')
str(my_property_data)
## 'data.frame': 31 obs. of 8 variables:
## $ Price : num 4.5 10.6 1.7 5 5 3.3 5.7 6.2 19.4 3.2 ...
## $ County : int 1 1 0 0 0 1 1 1 1 1 ...
## $ Size : num 138.4 52 16.1 1695.2 845 ...
## $ Elevation: int 10 4 0 1 1 2 4 4 20 0 ...
## $ Sewer : int 3000 0 2640 3500 1000 10000 0 0 1300 6000 ...
## $ Date : int -103 -103 -98 -93 -92 -86 -68 -64 -63 -62 ...
## $ Flood : int 0 0 1 0 1 0 0 0 0 0 ...
## $ Distance : num 0.3 2.5 10.3 14 14 0 0 0 1.2 0 ...
summary(my_property_data)
## Price County Size Elevation
## Min. : 1.70 Min. :0.0000 Min. : 6.90 Min. : 0.000
## 1st Qu.: 5.35 1st Qu.:0.0000 1st Qu.: 20.35 1st Qu.: 2.000
## Median :11.70 Median :1.0000 Median : 51.40 Median : 4.000
## Mean :11.95 Mean :0.6129 Mean : 139.97 Mean : 4.645
## 3rd Qu.:16.05 3rd Qu.:1.0000 3rd Qu.: 104.10 3rd Qu.: 7.000
## Max. :37.20 Max. :1.0000 Max. :1695.20 Max. :20.000
## Sewer Date Flood Distance
## Min. : 0 Min. :-103.00 Min. :0.0000 Min. : 0.000
## 1st Qu.: 0 1st Qu.: -63.50 1st Qu.:0.0000 1st Qu.: 0.850
## Median : 900 Median : -59.00 Median :0.0000 Median : 4.900
## Mean : 1981 Mean : -58.65 Mean :0.1613 Mean : 5.132
## 3rd Qu.: 3450 3rd Qu.: -51.00 3rd Qu.:0.0000 3rd Qu.: 5.500
## Max. :10000 Max. : -4.00 Max. :1.0000 Max. :16.500
boxplot(my_property_data,outline= TRUE)
boxplot(my_property_data$Size, outline= TRUE, col = "blue")
boxplot(my_property_data$Size, outline= FALSE, col = "blue")
Now lets understand the response variable and whether it is following normal one. Shapiro-Wilk normality test shows p value < 0.05 and so this means NULL hypothesis is rejected. Our Null hypothesis is that data is normal and hence test shows that data is not normal. Assumption of regression is that residual should be normally distributed. Q-Q Plot shows that after log, response variable becomes almost normal
shapiro.test(my_property_data$Price)
##
## Shapiro-Wilk normality test
##
## data: my_property_data$Price
## W = 0.90607, p-value = 0.01025
histogram(my_property_data$Price)
qqnorm(my_property_data$Price)
qqnorm(log(my_property_data$Price))
Scatter Plot is required to identfy the pattern of dependent variables and Independent Variables. Following is the scatter plot between Price and Size. The plot does not show any linear relation and seems to have log relation
PropPrice <- ggplot(my_property_data, aes(x = my_property_data$Size, y = my_property_data$Price, fill = 'County')) +
geom_point(shape = 21) +
theme_bw() +
theme() +
ggtitle("Property Price Relationship based on Country") +
labs(x = "Area Size", y = "Price", fill = "County") +
scale_x_continuous() +
scale_y_continuous() +
# scale_fill_manual(values = fill) +
# scale_size(range = c(1, 10)) +
theme(legend.position="bottom", legend.direction="horizontal")
PropPrice
Scatter Plot between Propertity Distance and Price does show a bit relaion and it seems that until the distance of 5KM the price is increasing but then again price is decreasing
PropPriceDistance <- ggplot(my_property_data, aes(x = my_property_data$Distance, y = my_property_data$Price, fill = 'County')) +
geom_point(shape = 21) +
theme_bw() +
theme() +
ggtitle("Property Price Relationship based on Distance") +
labs(x = "Distance", y = "Price", fill = "County") +
scale_x_continuous() +
scale_y_continuous() +
# scale_fill_manual(values = fill) +
# scale_size(range = c(1, 10)) +
theme(legend.position="bottom", legend.direction="horizontal")
PropPriceDistance
The scatter plit between Price and Elevation shows bit linear relationship and we will assume this as a factor influencing the price
PropPriceElevation <- ggplot(my_property_data, aes(x = my_property_data$Elevation, y = my_property_data$Price, fill = 'County')) +
geom_point(shape = 21) +
theme_bw() +
theme() +
ggtitle("Property Price Relationship based on Distance") +
labs(x = "Elevation", y = "Price", fill = "County") +
scale_x_continuous() +
scale_y_continuous() +
# scale_fill_manual(values = fill) +
# scale_size(range = c(1, 10)) +
theme(legend.position="bottom", legend.direction="horizontal")
PropPriceElevation
The scatter Plot between Price and Sewer shows a -ve relation and as the sewer distance is increasing then price is decreasing. So this variable should be considered as an independent variable for regression
PropPriceSewer <- ggplot(my_property_data, aes(x = my_property_data$Sewer, y = my_property_data$Price, fill = 'County')) +
geom_point(shape = 21) +
theme_bw() +
theme() +
ggtitle("Property Price Relationship based on Distance") +
labs(x = "Sewer", y = "Price", fill = "County") +
scale_x_continuous() +
scale_y_continuous() +
# scale_fill_manual(values = fill) +
# scale_size(range = c(1, 10)) +
theme(legend.position="bottom", legend.direction="horizontal")
PropPriceSewer
Box Plot shhow that price is higher for the areas where Flood is 0. So Flood is a factor which is influencing the property price and hence will be taken as factor for regression Analysis
boxplot(my_property_data$Price ~ my_property_data$Flood, outline= TRUE, col = "blue")
Co-Relation is required to identify if there is relationship among variables..Some interesting observations from the coelationship – Date and Price are co-related. This makes sense as date passes on the property price increase. So data is time series based here and we need to be careful with this – Flood and County are co-related and it seems that one county may be fllod prone – Elevation and County seems to be corelated.
property_data.cormatrix <- cor(my_property_data)
property_data.cormatrix_rounded<- round(property_data.cormatrix , digits=3)
print(property_data.cormatrix_rounded)
## Price County Size Elevation Sewer Date Flood Distance
## Price 1.000 -0.182 -0.240 0.352 -0.391 0.595 -0.323 0.093
## County -0.182 1.000 -0.339 0.475 -0.050 -0.370 -0.552 -0.742
## Size -0.240 -0.339 1.000 -0.209 0.053 -0.349 0.109 0.557
## Elevation 0.352 0.475 -0.209 1.000 -0.359 -0.057 -0.373 -0.362
## Sewer -0.391 -0.050 0.053 -0.359 1.000 -0.151 -0.113 -0.159
## Date 0.595 -0.370 -0.349 -0.057 -0.151 1.000 0.015 0.044
## Flood -0.323 -0.552 0.109 -0.373 -0.113 0.015 1.000 0.423
## Distance 0.093 -0.742 0.557 -0.362 -0.159 0.044 0.423 1.000
corrplot(property_data.cormatrix, method="shade", type="full", addCoef.col = "blue", order ="AOE", bg ='grey')
Regression Analysis assumption validation..Selected Indendent variables are – Size –County –Elevation –Sewer –Flood –Distance
mypropertylmRev <- lm(my_property_data$Price ~ my_property_data$Size + my_property_data$County + my_property_data$Elevation + my_property_data$Sewer + my_property_data$Flood + my_property_data$Distance, data=my_property_data)
summary(mypropertylmRev)
##
## Call:
## lm(formula = my_property_data$Price ~ my_property_data$Size +
## my_property_data$County + my_property_data$Elevation + my_property_data$Sewer +
## my_property_data$Flood + my_property_data$Distance, data = my_property_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.0024 -2.8881 0.0511 2.0732 11.3472
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.326e+01 3.984e+00 5.840 5.06e-06 ***
## my_property_data$Size -8.919e-03 3.220e-03 -2.770 0.01065 *
## my_property_data$County -1.284e+01 2.943e+00 -4.362 0.00021 ***
## my_property_data$Elevation 4.908e-01 2.480e-01 1.979 0.05943 .
## my_property_data$Sewer -1.203e-03 4.092e-04 -2.939 0.00717 **
## my_property_data$Flood -1.416e+01 2.841e+00 -4.985 4.32e-05 ***
## my_property_data$Distance 3.736e-02 3.425e-01 0.109 0.91405
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.618 on 24 degrees of freedom
## Multiple R-squared: 0.7134, Adjusted R-squared: 0.6417
## F-statistic: 9.956 on 6 and 24 DF, p-value: 1.513e-05
Lets Run Regression again by eliminating the Distance variable and compare the Adjusted R-Square.This is done becasue distance and country are having high co0relation and having both will impact the modelling
mypropertylm <- lm(my_property_data$Price ~ my_property_data$Size + my_property_data$County + my_property_data$Elevation + my_property_data$Sewer + my_property_data$Flood , data=my_property_data)
summary(mypropertylm)
##
## Call:
## lm(formula = my_property_data$Price ~ my_property_data$Size +
## my_property_data$County + my_property_data$Elevation + my_property_data$Sewer +
## my_property_data$Flood, data = my_property_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.0675 -2.8304 -0.0121 2.0759 11.2908
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.361e+01 2.414e+00 9.779 5.03e-10 ***
## my_property_data$Size -8.738e-03 2.706e-03 -3.229 0.00346 **
## my_property_data$County -1.304e+01 2.235e+00 -5.835 4.38e-06 ***
## my_property_data$Elevation 4.870e-01 2.407e-01 2.023 0.05387 .
## my_property_data$Sewer -1.219e-03 3.724e-04 -3.274 0.00310 **
## my_property_data$Flood -1.416e+01 2.784e+00 -5.088 2.97e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.525 on 25 degrees of freedom
## Multiple R-squared: 0.7132, Adjusted R-squared: 0.6559
## F-statistic: 12.44 on 5 and 25 DF, p-value: 3.995e-06
Now we try again by removing the Elevation which has p-value slightly > 0.05 but this seems to be reducing the Adjusted R-Square..Also Elevation does not seems to have any high corelation (>0.45) with these selcted variavles. But still we did not try the variable transformation which is being indicated by property price and property size curve relationship
mypropertylm3rd <- lm(my_property_data$Price ~ my_property_data$Size + my_property_data$County + my_property_data$Sewer + my_property_data$Flood , data=my_property_data)
summary(mypropertylm3rd)
##
## Call:
## lm(formula = my_property_data$Price ~ my_property_data$Size +
## my_property_data$County + my_property_data$Sewer + my_property_data$Flood,
## data = my_property_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.5610 -2.7807 -0.6888 2.4832 11.3671
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.597e+01 2.234e+00 11.624 8.46e-12 ***
## my_property_data$Size -9.132e-03 2.855e-03 -3.198 0.003617 **
## my_property_data$County -1.174e+01 2.265e+00 -5.185 2.06e-05 ***
## my_property_data$Sewer -1.534e-03 3.579e-04 -4.286 0.000221 ***
## my_property_data$Flood -1.553e+01 2.857e+00 -5.436 1.07e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.787 on 26 degrees of freedom
## Multiple R-squared: 0.6663, Adjusted R-squared: 0.615
## F-statistic: 12.98 on 4 and 26 DF, p-value: 6.148e-06
Following regression model tried with variable transformation. Here we did log of property size. This seems to have improved the Adjusted R Square a lot and this will be our final model. Our Residual Standard Error also have come down…
mypropertylm4th <- lm(my_property_data$Price ~ log(my_property_data$Size) + my_property_data$County + my_property_data$Elevation + my_property_data$Sewer + my_property_data$Flood , data=my_property_data)
summary(mypropertylm4th)
##
## Call:
## lm(formula = my_property_data$Price ~ log(my_property_data$Size) +
## my_property_data$County + my_property_data$Elevation + my_property_data$Sewer +
## my_property_data$Flood, data = my_property_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.033 -2.524 -1.235 1.729 8.587
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.346e+01 3.403e+00 9.834 4.49e-10 ***
## log(my_property_data$Size) -2.836e+00 6.158e-01 -4.606 0.000104 ***
## my_property_data$County -1.306e+01 1.927e+00 -6.777 4.21e-07 ***
## my_property_data$Elevation 5.662e-01 2.103e-01 2.692 0.012479 *
## my_property_data$Sewer -1.503e-03 3.323e-04 -4.523 0.000128 ***
## my_property_data$Flood -1.288e+01 2.424e+00 -5.314 1.66e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.962 on 25 degrees of freedom
## Multiple R-squared: 0.7802, Adjusted R-squared: 0.7362
## F-statistic: 17.74 on 5 and 25 DF, p-value: 1.628e-07
Now we wll add the time variable here and see how thatr affects predictability of the model..Looks like model predictability has increased to 75% from earlier 73%. Although interesting aspect here is that Date factor seems to be p value of 0.05 and thus we failed to reject the Null hypothesis that date has No impact in price. But due to the co-relation of date with other variable model predicatbility has increased. So we will stay with mypropertylm4th
##################### Now trying the regression model after replacing the outliers with
# my_property_data_new <- my_property_data
#
# my_property_data_new$RevisedSize <- quantile(my_property_data_new$Size,c(0.05, 0.95))
#
# boxplot(my_property_data_new$RevisedSize, outline= TRUE, col = "blue")
# boxplot(my_property_data$Size, outline= FALSE, col = "blue")
mypropertylm5th <- lm(my_property_data$Price ~ log(my_property_data$Size) + my_property_data$County + my_property_data$Elevation + my_property_data$Sewer + my_property_data$Flood + my_property_data$Date, data=my_property_data)
summary(mypropertylm5th)
##
## Call:
## lm(formula = my_property_data$Price ~ log(my_property_data$Size) +
## my_property_data$County + my_property_data$Elevation + my_property_data$Sewer +
## my_property_data$Flood + my_property_data$Date, data = my_property_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.041 -2.225 -1.320 1.682 9.957
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.306e+01 3.291e+00 10.046 4.50e-10 ***
## log(my_property_data$Size) -2.261e+00 6.845e-01 -3.304 0.002986 **
## my_property_data$County -1.086e+01 2.267e+00 -4.791 7.07e-05 ***
## my_property_data$Elevation 5.544e-01 2.030e-01 2.731 0.011638 *
## my_property_data$Sewer -1.313e-03 3.397e-04 -3.866 0.000739 ***
## my_property_data$Flood -1.165e+01 2.448e+00 -4.759 7.66e-05 ***
## my_property_data$Date 6.331e-02 3.743e-02 1.691 0.103734
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.823 on 24 degrees of freedom
## Multiple R-squared: 0.8036, Adjusted R-squared: 0.7545
## F-statistic: 16.36 on 6 and 24 DF, p-value: 2.013e-07
Earlier we had some outliers of size and we decided to proceed with them as outliers were approx 10% of total data. The slope here with Price and size seems. To better understand the implications of outliers better, I am going to compare the fit of a simple linear regression model on my dataset with and without outliers.
Please note the change in slope of the best fit line after removing the outliers of the size variable. If we used the outliers to train the model(left chart), our predictions would be exagerated (high error) for larger values of size because of the larger slope. This shows the outlier impact.
We will now run the regression with new dataset where we have removed the outlietrs and compare the model predictability with earlier ones. Removing the outlier very marginally increases the predictability
my_property_data_new <- my_property_data
sizeboxplot.outlietr <- boxplot(my_property_data$Size, outline= TRUE, col = "blue")
sizeboxplot.outlietr$out ## This will give all the outliers of size variable
## [1] 1695.2 845.0 320.6
my_property_data_new_outlier <- subset(my_property_data_new, Size < sizeboxplot.outlietr$out[3],select = Price:Distance ) ## here i am removing the outliers and populating into separate data frame
# Plot of data with outliers.
sizeboxplot.outlietr_removed <- boxplot(my_property_data_new_outlier$Size, outline= TRUE, col = "blue") ## Now the box plot shows no outlier
##
par(mfrow=c(1, 2))
plot(my_property_data_new$Size, my_property_data_new$Price, ylim=c(0, 60), main="With Outliers", xlab="Size", ylab="Price", pch="*", col="red", cex=2)
abline(lm(my_property_data_new$Price ~ my_property_data_new$Size , data=my_property_data_new), col="blue", lwd=3, lty=2)
plot(my_property_data_new_outlier$Size, my_property_data_new_outlier$Price, ylim=c(0, 60), main="Without Outliers", xlab="Size", ylab="Price", pch="*", col="red", cex=2)
abline(lm(my_property_data_new_outlier$Price ~ my_property_data_new_outlier$Size , data=my_property_data_new_outlier), col="blue", lwd=3, lty=2)
mypropertylm6th <- lm(my_property_data_new_outlier$Price ~ log(my_property_data_new_outlier$Size) + my_property_data_new_outlier$County + my_property_data_new_outlier$Elevation + my_property_data_new_outlier$Sewer + my_property_data_new_outlier$Flood , data=my_property_data_new_outlier)
summary(mypropertylm6th)
##
## Call:
## lm(formula = my_property_data_new_outlier$Price ~ log(my_property_data_new_outlier$Size) +
## my_property_data_new_outlier$County + my_property_data_new_outlier$Elevation +
## my_property_data_new_outlier$Sewer + my_property_data_new_outlier$Flood,
## data = my_property_data_new_outlier)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.1613 -2.5590 -0.9847 1.2886 8.1038
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 3.453e+01 4.087e+00 8.448
## log(my_property_data_new_outlier$Size) -3.063e+00 9.953e-01 -3.077
## my_property_data_new_outlier$County -1.332e+01 2.091e+00 -6.371
## my_property_data_new_outlier$Elevation 5.723e-01 2.205e-01 2.595
## my_property_data_new_outlier$Sewer -1.513e-03 3.524e-04 -4.293
## my_property_data_new_outlier$Flood -1.416e+01 2.878e+00 -4.921
## Pr(>|t|)
## (Intercept) 2.36e-08 ***
## log(my_property_data_new_outlier$Size) 0.005507 **
## my_property_data_new_outlier$County 2.07e-06 ***
## my_property_data_new_outlier$Elevation 0.016524 *
## my_property_data_new_outlier$Sewer 0.000295 ***
## my_property_data_new_outlier$Flood 6.39e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.032 on 22 degrees of freedom
## Multiple R-squared: 0.7873, Adjusted R-squared: 0.739
## F-statistic: 16.29 on 5 and 22 DF, p-value: 9.374e-07