## Prepare data.
Nanaimo <- read.csv(file = "http://latul.be/mbaa_531/data/nanaimo.csv", header = TRUE)
str (Nanaimo)
## 'data.frame': 549 obs. of 40 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ address : chr "122 - 1175 Resort Drive, Parksville" "26 - 4837 Roger Street, Port Alberni" "106c - 181 Beachside Drive, Parksville" "Lot 15 Fernwood Ave, Powell River" ...
## $ price : num 12000 19900 25000 29000 32500 33000 33500 34000 34900 35000 ...
## $ mls : int 389143 395030 393100 11249 388254 11768 373600 11448 11260 377535 ...
## $ lat : num 49.3 49.3 49.3 49.8 49.9 ...
## $ lng : num -124 -125 -124 -125 -125 ...
## $ bed : int 1 2 1 0 2 2 2 0 2 0 ...
## $ area : int 667 720 946 0 672 784 1200 0 1068 NA ...
## $ bath : int 1 1 2 0 1 1 2 0 1 0 ...
## $ type : chr "Condo/Strata" "Manufactured/Mobile" "Condo/Strata" "Lots/Acreage" ...
## $ landarea : num NA NA NA 0.45 NA NA NA 0.55 NA 0.33 ...
## $ lotwidth : int NA NA NA 75 NA NA NA NA NA 120 ...
## $ lotdepth : int NA NA NA 255 NA NA NA NA NA 120 ...
## $ water : chr "Municipal" "Municipal" "Municipal" "None" ...
## $ lotshape : chr NA NA NA "Irregular" ...
## $ sewer : chr "Connected" "Connected" "Connected" "None" ...
## $ access : chr "Street" NA NA NA ...
## $ zoning : chr "Other" "Other" "Multi-Family" "Single Family" ...
## $ mobile : chr NA "CSA Label" NA NA ...
## $ stratainfo : chr NA NA NA NA ...
## $ style : chr "Apartment" "Rancher" "Apartment" NA ...
## $ age : int NA 1975 2008 NA 1980 1983 2007 NA NA NA ...
## $ construction: chr "Frame" "Manufactured/Mobile" "Steel/Concrete" NA ...
## $ foundation : chr "Yes" "No" "Yes" NA ...
## $ exterior : chr "Wood" "Aluminum" "Hardi Plank" NA ...
## $ bsmttype : chr NA NA NA NA ...
## $ bsmtdev : chr NA NA NA NA ...
## $ insulceil : chr "Yes" "Unknown" "Yes" NA ...
## $ insulwalls : chr "Yes" "Unknown" "Yes" NA ...
## $ roof : chr "Metal" "Other" "Metal" NA ...
## $ heating : chr "Baseboard" "Forced Air" "Forced Air" NA ...
## $ fuel : chr "Electric" "Oil" "Natural Gas" NA ...
## $ aircond : chr NA NA "Central" NA ...
## $ parking : chr "Underground Garage" "1 Carport - Single" "Underground Garage" NA ...
## $ title : chr "Timeshare/Fractional Ownership" "Pad Rental" "Timeshare/Fractional Ownership" "Freehold" ...
## $ restrictions: chr NA NA NA "Unknown" ...
## $ taxes : int NA 259 NA 740 251 582 262 596 149 1063 ...
## $ taxyear : int NA 2014 NA 2014 2012 2014 2013 2014 2014 2013 ...
## $ stratafee : int 159 268 392 NA 412 NA 352 NA 312 NA ...
## $ insulation : chr NA NA NA NA ...
Identify the latitude and longitude figures of Woodgrove Centre. (Source: Worldwide Elevation Map Finder)
Latitude: 49.2366772
Longitude: -124.04945
Calculate the longitude and latitude distances separately and merge them into absolute distances.
Nanaimo$latdist <- abs(Nanaimo$lat - 49.2366772)
Nanaimo$lngdist <- abs(Nanaimo$lng - -124.04945)
Nanaimo$absdist <- sqrt( Nanaimo$latdist^2 + Nanaimo$lngdist^2 )
nanaimo1 <- subset (Nanaimo, Nanaimo$lat > 0 & Nanaimo$lng < 0 & Nanaimo$price >0 , na.rm = TRUE)
summary(nanaimo1$latdist)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0002183 0.0768845 0.2788080 0.3213486 0.5706033 0.8430081
summary(nanaimo1$lngdist)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0096 0.2626 0.4668 0.5548 0.7400 1.8588
summary(nanaimo1$absdist)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.01114 0.32164 0.73170 0.67589 0.80364 1.86092
summary(nanaimo1$price)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 12000 159450 269900 316183 419500 998000
library( ggplot2 )
base <- ggplot( data = nanaimo1, aes( x = absdist, y = price ) )
base + geom_point()
distance <- lm( formula = price ~ absdist , data = nanaimo1 )
summary (distance)
##
## Call:
## lm(formula = price ~ absdist, data = nanaimo1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -359259 -144340 -47782 100843 687347
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 398922 17384 22.948 < 2e-16 ***
## absdist -122415 22009 -5.562 4.27e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 205700 on 521 degrees of freedom
## Multiple R-squared: 0.05605, Adjusted R-squared: 0.05424
## F-statistic: 30.94 on 1 and 521 DF, p-value: 4.265e-08
base + geom_smooth( method = 'lm', formula = y ~ x, se = FALSE ) + geom_point()
nanaimo2 <- subset (Nanaimo, Nanaimo$bed > 0 , na.rm = TRUE)
summary(nanaimo2$bed)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 2.000 3.000 2.987 4.000 6.000
room <- lm( formula = price ~ absdist + bed , data = nanaimo2 )
summary( room )
##
## Call:
## lm(formula = price ~ absdist + bed, data = nanaimo2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -347034 -137864 -36631 107529 615019
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 255676 33636 7.601 2.56e-13 ***
## absdist -109862 25436 -4.319 2.03e-05 ***
## bed 61201 9648 6.344 6.72e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 196500 on 361 degrees of freedom
## (22 observations deleted due to missingness)
## Multiple R-squared: 0.1347, Adjusted R-squared: 0.1299
## F-statistic: 28.1 on 2 and 361 DF, p-value: 4.53e-12
ggplot( data = room, mapping = aes( x = absdist, y = price, size = bed ) ) + geom_point( alpha = .4 ) + geom_smooth( formula = y ~x, method = 'lm', se = FALSE )
nanaimo3 <- subset (Nanaimo, Nanaimo$bath >= 0, na.rm = TRUE)
nanaimo3$bath <- factor( ifelse( nanaimo3$bath > 1, 1, 0 ) )
summary(nanaimo3$bath)
## 0 1
## 263 286
bathroom <- lm( formula = price ~ absdist + bath , data = nanaimo3 )
summary( bathroom )
##
## Call:
## lm(formula = price ~ absdist + bath, data = nanaimo3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -411355 -113921 -45493 63303 681512
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 285715 19102 14.957 < 2e-16 ***
## absdist -86452 20287 -4.262 2.41e-05 ***
## bath1 174793 16586 10.539 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 186900 on 520 degrees of freedom
## (26 observations deleted due to missingness)
## Multiple R-squared: 0.2222, Adjusted R-squared: 0.2192
## F-statistic: 74.27 on 2 and 520 DF, p-value: < 2.2e-16
library(ggplot2)
ggplot( data = bathroom, mapping = aes( x = absdist, y = price, colour = bath ) ) + geom_point( alpha = .6 ) + geom_smooth( formula = y ~ x , method = 'lm', se = FALSE )
house <- lm( formula = price ~ absdist + bed + bath , data = nanaimo3 )
summary( house )
##
## Call:
## lm(formula = price ~ absdist + bed + bath, data = nanaimo3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -371506 -113961 -39284 74656 709193
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 261549 19517 13.401 < 2e-16 ***
## absdist -90906 19948 -4.557 6.47e-06 ***
## bed 32705 7286 4.489 8.83e-06 ***
## bath1 96299 23898 4.030 6.42e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 183600 on 519 degrees of freedom
## (26 observations deleted due to missingness)
## Multiple R-squared: 0.2512, Adjusted R-squared: 0.2469
## F-statistic: 58.05 on 3 and 519 DF, p-value: < 2.2e-16
ggplot( data = house, mapping = aes( x = absdist, y = price, colour = bath ) ) + geom_point( data = house, aes( x = absdist, y = price, size = bed , alpha = .4 )) + geom_smooth(formula = y~x, method=lm, se = FALSE, aes (group = bed)) + geom_smooth(formula = y~x, method=lm, se = FALSE) + geom_smooth(formula = y~x, method=lm, se = FALSE, aes (colour= absdist))
ggplot( data = house, mapping = aes( x = absdist, y = price, colour = bath ) ) + geom_point( data = house, aes( x = absdist, y = price, size = bed , alpha = .4 )) + geom_smooth(formula = y~x, method=lm, se = FALSE, aes(colour = absdist)) + ggtitle("Housing Price in Nanaimo")
## Conclusion
Acknowledge: Dr. Johan Latulippe. The course content provides comprehensive knowledge to help me learn to analyze data in the field of research through R.