#REGRESIÓN LINEAL MULTIPLE EN R
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, units
library(PerformanceAnalytics)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
##
## legend
# Para utilizar la funcion one_hot()
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:xts':
##
## first, last
library(mltools)
##
## Attaching package: 'mltools'
## The following object is masked from 'package:PerformanceAnalytics':
##
## skewness
#Para transformar a Camel CASE
library(tools)
#Para utilizar kable
library(knitr)
#Para el diagrama de dispersión
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(MASS)
library(ggfortify)
library(olsrr)
##
## Attaching package: 'olsrr'
## The following object is masked from 'package:MASS':
##
## cement
## The following object is masked from 'package:datasets':
##
## rivers
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.0 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ✔ purrr 0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::between() masks data.table::between()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks data.table::first(), xts::first()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::last() masks data.table::last(), xts::last()
## ✖ tidyr::replace_na() masks mltools::replace_na()
## ✖ dplyr::select() masks MASS::select()
## ✖ dplyr::src() masks Hmisc::src()
## ✖ dplyr::summarize() masks Hmisc::summarize()
## ✖ purrr::transpose() masks data.table::transpose()
##RECONOCIMIENTO DEL DATASET: CALIFORNIA HOUSING PRICES
Tenemos los datos del precio de las casas para diferentes distritos en California.
Cada fila representa un distrito.
Tenemos 10 atributos:
-Longitud
-Latitud
-Edad media de la casa
-Número de habitaciones
-Número de camas
-Población del distrito
-Hogares en el distrito
-Ingresos medios
-Proximidad al Oceano
#LEEMOS EL DATASET
df <- read.csv('housing.csv')
plot_map = ggplot(df,
aes(x = longitude, y = latitude, color = median_house_value
)) +
geom_point(aes(size = population), alpha = 0.4) +
xlab("Longitude") +
ylab("Latitude") +
ggtitle("Mapa de Longitud y Latitud : Precio Medio de las Casas") +
scale_color_distiller(palette = "Spectral") +
labs(color = "Median House Value ($USD)", size = "Población")
plot_map
head(df)
## longitude latitude housing_median_age total_rooms total_bedrooms population
## 1 -122.23 37.88 41 880 129 322
## 2 -122.22 37.86 21 7099 1106 2401
## 3 -122.24 37.85 52 1467 190 496
## 4 -122.25 37.85 52 1274 235 558
## 5 -122.25 37.85 52 1627 280 565
## 6 -122.25 37.85 52 919 213 413
## households median_income median_house_value ocean_proximity
## 1 126 8.3252 452600 NEAR BAY
## 2 1138 8.3014 358500 NEAR BAY
## 3 177 7.2574 352100 NEAR BAY
## 4 219 5.6431 341300 NEAR BAY
## 5 259 3.8462 342200 NEAR BAY
## 6 193 4.0368 269700 NEAR BAY
str(df)
## 'data.frame': 20640 obs. of 10 variables:
## $ longitude : num -122 -122 -122 -122 -122 ...
## $ latitude : num 37.9 37.9 37.9 37.9 37.9 ...
## $ housing_median_age: num 41 21 52 52 52 52 52 52 42 52 ...
## $ total_rooms : num 880 7099 1467 1274 1627 ...
## $ total_bedrooms : num 129 1106 190 235 280 ...
## $ population : num 322 2401 496 558 565 ...
## $ households : num 126 1138 177 219 259 ...
## $ median_income : num 8.33 8.3 7.26 5.64 3.85 ...
## $ median_house_value: num 452600 358500 352100 341300 342200 ...
## $ ocean_proximity : chr "NEAR BAY" "NEAR BAY" "NEAR BAY" "NEAR BAY" ...
summary(df)
## longitude latitude housing_median_age total_rooms
## Min. :-124.3 Min. :32.54 Min. : 1.00 Min. : 2
## 1st Qu.:-121.8 1st Qu.:33.93 1st Qu.:18.00 1st Qu.: 1448
## Median :-118.5 Median :34.26 Median :29.00 Median : 2127
## Mean :-119.6 Mean :35.63 Mean :28.64 Mean : 2636
## 3rd Qu.:-118.0 3rd Qu.:37.71 3rd Qu.:37.00 3rd Qu.: 3148
## Max. :-114.3 Max. :41.95 Max. :52.00 Max. :39320
##
## total_bedrooms population households median_income
## Min. : 1.0 Min. : 3 Min. : 1.0 Min. : 0.4999
## 1st Qu.: 296.0 1st Qu.: 787 1st Qu.: 280.0 1st Qu.: 2.5634
## Median : 435.0 Median : 1166 Median : 409.0 Median : 3.5348
## Mean : 537.9 Mean : 1425 Mean : 499.5 Mean : 3.8707
## 3rd Qu.: 647.0 3rd Qu.: 1725 3rd Qu.: 605.0 3rd Qu.: 4.7432
## Max. :6445.0 Max. :35682 Max. :6082.0 Max. :15.0001
## NA's :207
## median_house_value ocean_proximity
## Min. : 14999 Length:20640
## 1st Qu.:119600 Class :character
## Median :179700 Mode :character
## Mean :206856
## 3rd Qu.:264725
## Max. :500001
##
hist(df$median_house_value, breaks=100, main="Y : Precio Medio de las Casas", border="darkgoldenrod2", col="darkblue")
par(mfrow = c(3,3))
hist(df$longitude, breaks = 140, main = "longitude", border="darkorange", col="dodgerblue")
hist(df$latitude, breaks = 140, main = "latitude", border="darkorange", col="dodgerblue")
hist(df$housing_median_age, breaks = 140, main = "housing_median_age", border="darkorange", col="dodgerblue")
hist(df$total_rooms, breaks = 140, main = "total_rooms", border="darkorange", col="dodgerblue")
hist(df$total_bedrooms, breaks = 140, main = "total_bedrooms", border="darkorange", col="dodgerblue")
hist(df$population, breaks = 140, main = "population", border="darkorange", col="dodgerblue")
hist(df$households, breaks = 140, main = "households", border="darkorange", col="dodgerblue")
hist(df$median_income, breaks = 140, main = "median_income", border="darkorange", col="dodgerblue")
hist(df$median_house_value, breaks = 140, main = "median_house_value", border="darkorange", col="dodgerblue")
## OBSERVEMOS UNA DIAGRAMA DE DISPERSIÓN DE TODOS LOS DATOS EXCEPTO
LONGITUD Y LATITUD
Z = df[,-1:-2]
ggpairs(Z, columns=1:7, ggplot2::aes(colour=ocean_proximity), progress = FALSE)
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 207 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 207 rows containing missing values
## Warning: Removed 207 rows containing missing values (geom_point).
## Removed 207 rows containing missing values (geom_point).
## Warning: Removed 207 rows containing non-finite values (stat_density).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 207 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 207 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 207 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 207 rows containing missing values
## Warning: Removed 207 rows containing missing values (geom_point).
## Removed 207 rows containing missing values (geom_point).
## Removed 207 rows containing missing values (geom_point).
## Removed 207 rows containing missing values (geom_point).
Z = df[, -10]
Z <- na.omit(Z)
corrmatrix = cor(Z)
t(corrmatrix)
## longitude latitude housing_median_age total_rooms
## longitude 1.00000000 -0.92461611 -0.10935655 0.04548017
## latitude -0.92461611 1.00000000 0.01189907 -0.03666681
## housing_median_age -0.10935655 0.01189907 1.00000000 -0.36062830
## total_rooms 0.04548017 -0.03666681 -0.36062830 1.00000000
## total_bedrooms 0.06960802 -0.06698283 -0.32045104 0.93037950
## population 0.10027030 -0.10899734 -0.29578730 0.85728125
## households 0.05651277 -0.07177419 -0.30276797 0.91899153
## median_income -0.01555015 -0.07962632 -0.11827772 0.19788152
## median_house_value -0.04539822 -0.14463821 0.10643205 0.13329413
## total_bedrooms population households median_income
## longitude 0.06960802 0.100270301 0.05651277 -0.015550150
## latitude -0.06698283 -0.108997344 -0.07177419 -0.079626319
## housing_median_age -0.32045104 -0.295787297 -0.30276797 -0.118277723
## total_rooms 0.93037950 0.857281251 0.91899153 0.197881519
## total_bedrooms 1.00000000 0.877746743 0.97972827 -0.007722850
## population 0.87774674 1.000000000 0.90718590 0.005086624
## households 0.97972827 0.907185900 1.00000000 0.013433892
## median_income -0.00772285 0.005086624 0.01343389 1.000000000
## median_house_value 0.04968618 -0.025299732 0.06489355 0.688355475
## median_house_value
## longitude -0.04539822
## latitude -0.14463821
## housing_median_age 0.10643205
## total_rooms 0.13329413
## total_bedrooms 0.04968618
## population -0.02529973
## households 0.06489355
## median_income 0.68835548
## median_house_value 1.00000000
par(mfrow = c(2,3))
plot( df$housing_median_age, df$median_house_value)
plot(df$total_rooms,df$median_house_value)
plot(df$total_bedrooms, df$median_house_value)
plot(df$population, df$median_house_value)
plot(df$households, df$median_house_value)
plot(df$median_income, df$median_house_value)
summary(df$ocean_proximity)
## Length Class Mode
## 20640 character character
ggplot(df, aes(x = factor(ocean_proximity))) + geom_bar(stat = "count", color = "black", fill = "black")+xlab("categoria") +
ylab("n") +
ggtitle("Proximidad al Oceano")
## PROBLEMAS DE DATOS CONCENTRADOS EN UN VALOR PARA ALGUNAS
CARACTERISTICAS
par(mfrow = c(1,3))
plot(df$median_house_value~df$median_income, main="MEDIAN_INCOME")
hist(df$median_income, breaks = 140, main = "MEDIAN_INCOME", border="chartreuse3", col="dodgerblue")
out_median_income=boxplot(df$households, main="MEDIAN_INCOME")
par(mfrow = c(1,3))
plot(df$median_house_value~df$total_rooms, main="TOTAL_ROOMS")
hist(df$median_income, breaks = 140, main = "TOTAL_ROOMS", border="chartreuse4", col="darkred")
out_total_rooms=boxplot(df$total_rooms, main="TOTAL_ROOMS")
par(mfrow = c(1,3))
plot(df$median_house_value~df$total_bedrooms, main="TOTAL_BEDROOMS")
hist(df$median_income, breaks = 140, main = "TOTAL_BEDROOMS", border="chartreuse4", col="darkred")
out_total_bedrooms= boxplot(df$total_bedrooms, main="TOTAL_BEDROOMS")
par(mfrow = c(1,3))
plot(df$median_house_value~df$population, main="POPULATION")
hist(df$median_income, breaks = 140, main = "POPULATION", border="chartreuse4", col="darkred")
out_population= boxplot(df$population, main="POPULATION")
par(mfrow = c(1,3))
plot(df$median_house_value~df$households, main="HOUSEHOLDS")
hist(df$households, breaks = 140, main = "HOUSEHOLDS",border="chartreuse4", col="darkred")
out_households= boxplot(df$households, main="HOUSEHOLDS")
par(mfrow = c(1,2))
hist(df$median_house_value, breaks = 140, main = "MEDIAN_HOUSE_VALUE", border="chartreuse3", col="dodgerblue")
out_median_value=boxplot(df$median_house_value, main="MEDIAN_HOUSE_VALUE")
str(df)
## 'data.frame': 20640 obs. of 10 variables:
## $ longitude : num -122 -122 -122 -122 -122 ...
## $ latitude : num 37.9 37.9 37.9 37.9 37.9 ...
## $ housing_median_age: num 41 21 52 52 52 52 52 52 42 52 ...
## $ total_rooms : num 880 7099 1467 1274 1627 ...
## $ total_bedrooms : num 129 1106 190 235 280 ...
## $ population : num 322 2401 496 558 565 ...
## $ households : num 126 1138 177 219 259 ...
## $ median_income : num 8.33 8.3 7.26 5.64 3.85 ...
## $ median_house_value: num 452600 358500 352100 341300 342200 ...
## $ ocean_proximity : chr "NEAR BAY" "NEAR BAY" "NEAR BAY" "NEAR BAY" ...
df$ocean_proximity = as.factor(df$ocean_proximity)
df<- one_hot(as.data.table(df))
colnames(df)[10] <- "ocean_proximity_1H_OCEAN"
colnames(df)[13] <- "ocean_proximity_NEAR_BAY"
colnames(df)[14] <- "ocean_proximity_NEAR_OCEAN"
df <- na.omit(df)
str(df)
## Classes 'data.table' and 'data.frame': 20433 obs. of 14 variables:
## $ longitude : num -122 -122 -122 -122 -122 ...
## $ latitude : num 37.9 37.9 37.9 37.9 37.9 ...
## $ housing_median_age : num 41 21 52 52 52 52 52 52 42 52 ...
## $ total_rooms : num 880 7099 1467 1274 1627 ...
## $ total_bedrooms : num 129 1106 190 235 280 ...
## $ population : num 322 2401 496 558 565 ...
## $ households : num 126 1138 177 219 259 ...
## $ median_income : num 8.33 8.3 7.26 5.64 3.85 ...
## $ median_house_value : num 452600 358500 352100 341300 342200 ...
## $ ocean_proximity_1H_OCEAN : int 0 0 0 0 0 0 0 0 0 0 ...
## $ ocean_proximity_INLAND : int 0 0 0 0 0 0 0 0 0 0 ...
## $ ocean_proximity_ISLAND : int 0 0 0 0 0 0 0 0 0 0 ...
## $ ocean_proximity_NEAR_BAY : int 1 1 1 1 1 1 1 1 1 1 ...
## $ ocean_proximity_NEAR_OCEAN: int 0 0 0 0 0 0 0 0 0 0 ...
## - attr(*, ".internal.selfref")=<externalptr>
cdf=df #clean_data_frame
#cdf_lon_lat=df #clean_data_frame
#QUITAMOS LOS NAN's
cdf <- na.omit(cdf)
#QUITAMOS 'ISLANDS'
cdf<-cdf[,-12]
#QUITMOS LONGITUD Y LATITUD
#cdf <- cdf[,-1:-2]
#REMOVEMOS ALGUNOS OUTLAYERS
cdf<-cdf[cdf$median_house_value <500000, ]
cdf<-cdf[cdf$median_income <15, ]
cdf<-cdf[cdf$housing_median_age <49, ]
# QUITAMOS LOS OUTLAYERS
#cdf<- cdf[-which(cdf$total_rooms%in% out_total_rooms),]
#cdf<- cdf[-which(cdf$total_bedrooms%in% out_total_bedrooms),]
#cdf<- cdf[-which(cdf$population%in% out_population),]
#cdf<- cdf[-which(cdf$households%in% out_households),]
#cdf<- cdf[-which(cdf$median_income%in% out_median_income),]
#cdf<- cdf[-which(cdf$median_house_value%in% out_median_value),]
str(cdf)
## Classes 'data.table' and 'data.frame': 18071 obs. of 13 variables:
## $ longitude : num -122 -122 -122 -122 -122 ...
## $ latitude : num 37.9 37.9 37.8 37.9 37.9 ...
## $ housing_median_age : num 41 21 42 40 42 41 48 48 43 40 ...
## $ total_rooms : num 880 7099 2555 751 1639 ...
## $ total_bedrooms : num 129 1106 665 184 367 ...
## $ population : num 322 2401 1206 409 929 ...
## $ households : num 126 1138 595 166 366 ...
## $ median_income : num 8.33 8.3 2.08 1.36 1.71 ...
## $ median_house_value : num 452600 358500 226700 147500 159800 ...
## $ ocean_proximity_1H_OCEAN : int 0 0 0 0 0 0 0 0 0 0 ...
## $ ocean_proximity_INLAND : int 0 0 0 0 0 0 0 0 0 0 ...
## $ ocean_proximity_NEAR_BAY : int 1 1 1 1 1 1 1 1 1 1 ...
## $ ocean_proximity_NEAR_OCEAN: int 0 0 0 0 0 0 0 0 0 0 ...
## - attr(*, ".internal.selfref")=<externalptr>
cdf$rooms_per_household <- cdf$total_rooms/ cdf$households
cdf$bedrooms_per_room= cdf$total_bedrooms/ cdf$total_rooms
cdf$population_per_household=cdf$population/ cdf$households
cdf$log_total_rooms=log(cdf$total_rooms)
cdf$log_total_bedrooms=log(cdf$total_bedrooms)
cdf$log_population=log(cdf$population)
cdf$log_households=log(cdf$households)
cdf$log_median_income=log(cdf$median_income)
cdf$log_median_house_value=log(cdf$median_house_value)
cdf$log_rooms_per_household <- cdf$log_total_rooms/ cdf$log_households
cdf$log_bedrooms_per_room= cdf$log_total_bedrooms/ cdf$log_total_rooms
cdf$log_population_per_household=cdf$log_population/ cdf$log_households
cdf2=cdf
cdf2$median_house_value=NULL
cdf$log_median_house_value=NULL
str(cdf)
## Classes 'data.table' and 'data.frame': 18071 obs. of 24 variables:
## $ longitude : num -122 -122 -122 -122 -122 ...
## $ latitude : num 37.9 37.9 37.8 37.9 37.9 ...
## $ housing_median_age : num 41 21 42 40 42 41 48 48 43 40 ...
## $ total_rooms : num 880 7099 2555 751 1639 ...
## $ total_bedrooms : num 129 1106 665 184 367 ...
## $ population : num 322 2401 1206 409 929 ...
## $ households : num 126 1138 595 166 366 ...
## $ median_income : num 8.33 8.3 2.08 1.36 1.71 ...
## $ median_house_value : num 452600 358500 226700 147500 159800 ...
## $ ocean_proximity_1H_OCEAN : int 0 0 0 0 0 0 0 0 0 0 ...
## $ ocean_proximity_INLAND : int 0 0 0 0 0 0 0 0 0 0 ...
## $ ocean_proximity_NEAR_BAY : int 1 1 1 1 1 1 1 1 1 1 ...
## $ ocean_proximity_NEAR_OCEAN : int 0 0 0 0 0 0 0 0 0 0 ...
## $ rooms_per_household : num 6.98 6.24 4.29 4.52 4.48 ...
## $ bedrooms_per_room : num 0.147 0.156 0.26 0.245 0.224 ...
## $ population_per_household : num 2.56 2.11 2.03 2.46 2.54 ...
## $ log_total_rooms : num 6.78 8.87 7.85 6.62 7.4 ...
## $ log_total_bedrooms : num 4.86 7.01 6.5 5.21 5.91 ...
## $ log_population : num 5.77 7.78 7.1 6.01 6.83 ...
## $ log_households : num 4.84 7.04 6.39 5.11 5.9 ...
## $ log_median_income : num 2.119 2.116 0.733 0.306 0.539 ...
## $ log_rooms_per_household : num 1.4 1.26 1.23 1.3 1.25 ...
## $ log_bedrooms_per_room : num 0.717 0.79 0.828 0.788 0.798 ...
## $ log_population_per_household: num 1.19 1.11 1.11 1.18 1.16 ...
## - attr(*, ".internal.selfref")=<externalptr>
corrmatrix = cor(cdf)
t(corrmatrix)
## longitude latitude housing_median_age
## longitude 1.0000000000 -0.922364020 -0.02308773
## latitude -0.9223640199 1.000000000 -0.06172033
## housing_median_age -0.0230877337 -0.061720327 1.00000000
## total_rooms 0.0336844030 -0.026127082 -0.37476300
## total_bedrooms 0.0630945239 -0.063519194 -0.33130972
## population 0.0888992366 -0.105798593 -0.27718377
## households 0.0493879355 -0.069338040 -0.31045185
## median_income -0.0186060804 -0.074272054 -0.19793210
## median_house_value -0.0232645793 -0.172011621 0.01381800
## ocean_proximity_1H_OCEAN 0.2854194785 -0.428637043 0.11762475
## ocean_proximity_INLAND -0.0922112705 0.386166117 -0.21969804
## ocean_proximity_NEAR_BAY -0.4117854923 0.307068376 0.12337611
## ocean_proximity_NEAR_OCEAN 0.0421879553 -0.162919971 0.03724211
## rooms_per_household -0.0384494147 0.130983264 -0.17876200
## bedrooms_per_room 0.1157156990 -0.143056445 0.14426166
## population_per_household 0.0004556728 0.002566498 0.02155349
## log_total_rooms 0.0209708958 -0.024666645 -0.32481705
## log_total_bedrooms 0.0576674469 -0.068843342 -0.27763584
## log_population 0.1030874510 -0.141221893 -0.21160782
## log_households 0.0532614785 -0.091996515 -0.24197414
## log_median_income -0.0198907447 -0.075912691 -0.18403576
## log_rooms_per_household -0.0700990514 0.151488923 -0.05157560
## log_bedrooms_per_room 0.1105947173 -0.134860118 -0.02612265
## log_population_per_household 0.0623846343 -0.046976566 0.10572349
## total_rooms total_bedrooms population
## longitude 0.0336844030 0.063094524 0.08889924
## latitude -0.0261270816 -0.063519194 -0.10579859
## housing_median_age -0.3747630026 -0.331309723 -0.27718377
## total_rooms 1.0000000000 0.935422796 0.86023006
## total_bedrooms 0.9354227956 1.000000000 0.88021200
## population 0.8602300574 0.880212004 1.00000000
## households 0.9224226021 0.978746198 0.91068245
## median_income 0.2227087874 0.020802496 0.04015941
## median_house_value 0.1532913001 0.079970469 0.02227064
## ocean_proximity_1H_OCEAN -0.0162807691 0.012381030 0.06951887
## ocean_proximity_INLAND 0.0263535919 -0.013435002 -0.03799835
## ocean_proximity_NEAR_BAY 0.0003676967 -0.003761993 -0.03433719
## ocean_proximity_NEAR_OCEAN -0.0134742604 0.003929836 -0.02140617
## rooms_per_household 0.1397245138 0.013422080 -0.07445637
## bedrooms_per_room -0.1875404824 0.076759152 0.03409933
## population_per_household -0.0241525895 -0.027629035 0.06532818
## log_total_rooms 0.7940511233 0.771069090 0.69415683
## log_total_bedrooms 0.7506468980 0.817471436 0.72221616
## log_population 0.6868776974 0.725579093 0.79772220
## log_households 0.7314149724 0.789293297 0.73601806
## log_median_income 0.2322828171 0.042326951 0.05389188
## log_rooms_per_household -0.1963873711 -0.327411813 -0.35015483
## log_bedrooms_per_room 0.2734459719 0.491888221 0.41129794
## log_population_per_household -0.2756666873 -0.317412823 -0.09722667
## households median_income median_house_value
## longitude 0.049387935 -0.018606080 -0.02326458
## latitude -0.069338040 -0.074272054 -0.17201162
## housing_median_age -0.310451853 -0.197932095 0.01381800
## total_rooms 0.922422602 0.222708787 0.15329130
## total_bedrooms 0.978746198 0.020802496 0.07997047
## population 0.910682455 0.040159411 0.02227064
## households 1.000000000 0.045468585 0.09948996
## median_income 0.045468585 1.000000000 0.66577485
## median_house_value 0.099489960 0.665774849 1.00000000
## ocean_proximity_1H_OCEAN 0.038132019 0.184800134 0.32103851
## ocean_proximity_INLAND -0.047228557 -0.228050323 -0.50301677
## ocean_proximity_NEAR_BAY 0.005665539 0.067867661 0.11481952
## ocean_proximity_NEAR_OCEAN 0.006090457 -0.005749940 0.14500129
## rooms_per_household -0.078929107 0.311129787 0.11363592
## bedrooms_per_room 0.058160385 -0.634558982 -0.23391359
## population_per_household -0.026534027 0.025866833 -0.01991006
## log_total_rooms 0.769172585 0.234052911 0.17626147
## log_total_bedrooms 0.808594866 0.009043832 0.08548597
## log_population 0.759431749 0.037795960 0.02962732
## log_households 0.815401150 0.042968702 0.11003767
## log_median_income 0.067472361 0.958447829 0.62987265
## log_rooms_per_household -0.384828841 0.256478728 0.05214824
## log_bedrooms_per_room 0.474746017 -0.478212118 -0.15192829
## log_population_per_household -0.313887439 -0.016494389 -0.15963422
## ocean_proximity_1H_OCEAN ocean_proximity_INLAND
## longitude 0.285419479 -0.09221127
## latitude -0.428637043 0.38616612
## housing_median_age 0.117624750 -0.21969804
## total_rooms -0.016280769 0.02635359
## total_bedrooms 0.012381030 -0.01343500
## population 0.069518865 -0.03799835
## households 0.038132019 -0.04722856
## median_income 0.184800134 -0.22805032
## median_house_value 0.321038511 -0.50301677
## ocean_proximity_1H_OCEAN 1.000000000 -0.65541248
## ocean_proximity_INLAND -0.655412478 1.00000000
## ocean_proximity_NEAR_BAY -0.267518416 -0.21501585
## ocean_proximity_NEAR_OCEAN -0.341321517 -0.27433452
## rooms_per_household -0.126176671 0.18085036
## bedrooms_per_room 0.104289188 -0.14222788
## population_per_household -0.002092085 0.01133422
## log_total_rooms 0.009729217 -0.01091605
## log_total_bedrooms 0.038781199 -0.05283844
## log_population 0.116629610 -0.09485921
## log_households 0.073411382 -0.09864180
## log_median_income 0.185238287 -0.23232746
## log_rooms_per_household -0.144206718 0.19766611
## log_bedrooms_per_room 0.094314045 -0.12904802
## log_population_per_household 0.033718204 0.04757230
## ocean_proximity_NEAR_BAY
## longitude -0.4117854923
## latitude 0.3070683763
## housing_median_age 0.1233761099
## total_rooms 0.0003676967
## total_bedrooms -0.0037619932
## population -0.0343371915
## households 0.0056655389
## median_income 0.0678676612
## median_house_value 0.1148195192
## ocean_proximity_1H_OCEAN -0.2675184160
## ocean_proximity_INLAND -0.2150158524
## ocean_proximity_NEAR_BAY 1.0000000000
## ocean_proximity_NEAR_OCEAN -0.1119745800
## rooms_per_household -0.0201352195
## bedrooms_per_room -0.0210359696
## population_per_household -0.0120023879
## log_total_rooms -0.0007979923
## log_total_bedrooms -0.0080782466
## log_population -0.0378127369
## log_households 0.0036637247
## log_median_income 0.0633516305
## log_rooms_per_household -0.0113660622
## log_bedrooms_per_room -0.0232309601
## log_population_per_household -0.0682139322
## ocean_proximity_NEAR_OCEAN rooms_per_household
## longitude 0.042187955 -0.038449415
## latitude -0.162919971 0.130983264
## housing_median_age 0.037242109 -0.178761998
## total_rooms -0.013474260 0.139724514
## total_bedrooms 0.003929836 0.013422080
## population -0.021406165 -0.074456368
## households 0.006090457 -0.078929107
## median_income -0.005749940 0.311129787
## median_house_value 0.145001289 0.113635921
## ocean_proximity_1H_OCEAN -0.341321517 -0.126176671
## ocean_proximity_INLAND -0.274334523 0.180850360
## ocean_proximity_NEAR_BAY -0.111974580 -0.020135220
## ocean_proximity_NEAR_OCEAN 1.000000000 -0.053486583
## rooms_per_household -0.053486583 1.000000000
## bedrooms_per_room 0.064356659 -0.415270347
## population_per_household -0.003237739 -0.007216777
## log_total_rooms 0.002008713 0.139160080
## log_total_bedrooms 0.024397651 0.001579552
## log_population -0.007586525 -0.177676942
## log_households 0.028601981 -0.162566947
## log_median_income 0.003446060 0.295219340
## log_rooms_per_household -0.057864508 0.659298701
## log_bedrooms_per_room 0.062433286 -0.298376578
## log_population_per_household -0.062813422 0.038223326
## bedrooms_per_room population_per_household
## longitude 0.1157156990 0.0004556728
## latitude -0.1430564446 0.0025664977
## housing_median_age 0.1442616560 0.0215534933
## total_rooms -0.1875404824 -0.0241525895
## total_bedrooms 0.0767591524 -0.0276290346
## population 0.0340993331 0.0653281812
## households 0.0581603852 -0.0265340274
## median_income -0.6345589816 0.0258668333
## median_house_value -0.2339135880 -0.0199100594
## ocean_proximity_1H_OCEAN 0.1042891883 -0.0020920846
## ocean_proximity_INLAND -0.1422278798 0.0113342227
## ocean_proximity_NEAR_BAY -0.0210359696 -0.0120023879
## ocean_proximity_NEAR_OCEAN 0.0643566593 -0.0032377391
## rooms_per_household -0.4152703470 -0.0072167769
## bedrooms_per_room 1.0000000000 0.0043603587
## population_per_household 0.0043603587 1.0000000000
## log_total_rooms -0.2477146086 -0.0835735056
## log_total_bedrooms 0.0617609405 -0.0846318913
## log_population 0.0172628437 0.0417694650
## log_households 0.0321968961 -0.0810461733
## log_median_income -0.6489189054 0.0145874010
## log_rooms_per_household -0.4207453003 0.0706099308
## log_bedrooms_per_room 0.7211657237 -0.0853377761
## log_population_per_household -0.0005781063 0.5528646835
## log_total_rooms log_total_bedrooms log_population
## longitude 0.0209708958 0.057667447 0.103087451
## latitude -0.0246666450 -0.068843342 -0.141221893
## housing_median_age -0.3248170511 -0.277635842 -0.211607821
## total_rooms 0.7940511233 0.750646898 0.686877697
## total_bedrooms 0.7710690903 0.817471436 0.725579093
## population 0.6941568286 0.722216158 0.797722200
## households 0.7691725851 0.808594866 0.759431749
## median_income 0.2340529114 0.009043832 0.037795960
## median_house_value 0.1762614681 0.085485972 0.029627317
## ocean_proximity_1H_OCEAN 0.0097292173 0.038781199 0.116629610
## ocean_proximity_INLAND -0.0109160491 -0.052838437 -0.094859210
## ocean_proximity_NEAR_BAY -0.0007979923 -0.008078247 -0.037812737
## ocean_proximity_NEAR_OCEAN 0.0020087127 0.024397651 -0.007586525
## rooms_per_household 0.1391600797 0.001579552 -0.177676942
## bedrooms_per_room -0.2477146086 0.061760940 0.017262844
## population_per_household -0.0835735056 -0.084631891 0.041769465
## log_total_rooms 1.0000000000 0.949123739 0.863753047
## log_total_bedrooms 0.9491237386 1.000000000 0.895265226
## log_population 0.8637530465 0.895265226 1.000000000
## log_households 0.9326880798 0.972298862 0.933558108
## log_median_income 0.2671505080 0.045589068 0.069285155
## log_rooms_per_household -0.3705381193 -0.520693264 -0.601721079
## log_bedrooms_per_room 0.4350723248 0.686033805 0.586311406
## log_population_per_household -0.4906483419 -0.511370349 -0.230411960
## log_households log_median_income
## longitude 0.053261479 -0.01989074
## latitude -0.091996515 -0.07591269
## housing_median_age -0.241974142 -0.18403576
## total_rooms 0.731414972 0.23228282
## total_bedrooms 0.789293297 0.04232695
## population 0.736018062 0.05389188
## households 0.815401150 0.06747236
## median_income 0.042968702 0.95844783
## median_house_value 0.110037671 0.62987265
## ocean_proximity_1H_OCEAN 0.073411382 0.18523829
## ocean_proximity_INLAND -0.098641795 -0.23232746
## ocean_proximity_NEAR_BAY 0.003663725 0.06335163
## ocean_proximity_NEAR_OCEAN 0.028601981 0.00344606
## rooms_per_household -0.162566947 0.29521934
## bedrooms_per_room 0.032196896 -0.64891891
## population_per_household -0.081046173 0.01458740
## log_total_rooms 0.932688080 0.26715051
## log_total_bedrooms 0.972298862 0.04558907
## log_population 0.933558108 0.06928515
## log_households 1.000000000 0.08102491
## log_median_income 0.081024906 1.00000000
## log_rooms_per_household -0.630269747 0.22346051
## log_bedrooms_per_room 0.645308828 -0.45100734
## log_population_per_household -0.507470925 -0.04566184
## log_rooms_per_household log_bedrooms_per_room
## longitude -0.07009905 0.11059472
## latitude 0.15148892 -0.13486012
## housing_median_age -0.05157560 -0.02612265
## total_rooms -0.19638737 0.27344597
## total_bedrooms -0.32741181 0.49188822
## population -0.35015483 0.41129794
## households -0.38482884 0.47474602
## median_income 0.25647873 -0.47821212
## median_house_value 0.05214824 -0.15192829
## ocean_proximity_1H_OCEAN -0.14420672 0.09431404
## ocean_proximity_INLAND 0.19766611 -0.12904802
## ocean_proximity_NEAR_BAY -0.01136606 -0.02323096
## ocean_proximity_NEAR_OCEAN -0.05786451 0.06243329
## rooms_per_household 0.65929870 -0.29837658
## bedrooms_per_room -0.42074530 0.72116572
## population_per_household 0.07060993 -0.08533778
## log_total_rooms -0.37053812 0.43507232
## log_total_bedrooms -0.52069326 0.68603380
## log_population -0.60172108 0.58631141
## log_households -0.63026975 0.64530883
## log_median_income 0.22346051 -0.45100734
## log_rooms_per_household 1.00000000 -0.69434825
## log_bedrooms_per_room -0.69434825 1.00000000
## log_population_per_household 0.46168538 -0.39208473
## log_population_per_household
## longitude 0.0623846343
## latitude -0.0469765656
## housing_median_age 0.1057234910
## total_rooms -0.2756666873
## total_bedrooms -0.3174128230
## population -0.0972266706
## households -0.3138874388
## median_income -0.0164943895
## median_house_value -0.1596342228
## ocean_proximity_1H_OCEAN 0.0337182043
## ocean_proximity_INLAND 0.0475723024
## ocean_proximity_NEAR_BAY -0.0682139322
## ocean_proximity_NEAR_OCEAN -0.0628134216
## rooms_per_household 0.0382233257
## bedrooms_per_room -0.0005781063
## population_per_household 0.5528646835
## log_total_rooms -0.4906483419
## log_total_bedrooms -0.5113703493
## log_population -0.2304119597
## log_households -0.5074709249
## log_median_income -0.0456618362
## log_rooms_per_household 0.4616853762
## log_bedrooms_per_room -0.3920847272
## log_population_per_household 1.0000000000
corrmatrix = cor(cdf2)
t(corrmatrix)
## longitude latitude housing_median_age
## longitude 1.0000000000 -0.922364020 -0.023087734
## latitude -0.9223640199 1.000000000 -0.061720327
## housing_median_age -0.0230877337 -0.061720327 1.000000000
## total_rooms 0.0336844030 -0.026127082 -0.374763003
## total_bedrooms 0.0630945239 -0.063519194 -0.331309723
## population 0.0888992366 -0.105798593 -0.277183767
## households 0.0493879355 -0.069338040 -0.310451853
## median_income -0.0186060804 -0.074272054 -0.197932095
## ocean_proximity_1H_OCEAN 0.2854194785 -0.428637043 0.117624750
## ocean_proximity_INLAND -0.0922112705 0.386166117 -0.219698042
## ocean_proximity_NEAR_BAY -0.4117854923 0.307068376 0.123376110
## ocean_proximity_NEAR_OCEAN 0.0421879553 -0.162919971 0.037242109
## rooms_per_household -0.0384494147 0.130983264 -0.178761998
## bedrooms_per_room 0.1157156990 -0.143056445 0.144261656
## population_per_household 0.0004556728 0.002566498 0.021553493
## log_total_rooms 0.0209708958 -0.024666645 -0.324817051
## log_total_bedrooms 0.0576674469 -0.068843342 -0.277635842
## log_population 0.1030874510 -0.141221893 -0.211607821
## log_households 0.0532614785 -0.091996515 -0.241974142
## log_median_income -0.0198907447 -0.075912691 -0.184035763
## log_median_house_value 0.0028206135 -0.217282976 -0.002693287
## log_rooms_per_household -0.0700990514 0.151488923 -0.051575601
## log_bedrooms_per_room 0.1105947173 -0.134860118 -0.026122647
## log_population_per_household 0.0623846343 -0.046976566 0.105723491
## total_rooms total_bedrooms population
## longitude 0.0336844030 0.063094524 0.08889924
## latitude -0.0261270816 -0.063519194 -0.10579859
## housing_median_age -0.3747630026 -0.331309723 -0.27718377
## total_rooms 1.0000000000 0.935422796 0.86023006
## total_bedrooms 0.9354227956 1.000000000 0.88021200
## population 0.8602300574 0.880212004 1.00000000
## households 0.9224226021 0.978746198 0.91068245
## median_income 0.2227087874 0.020802496 0.04015941
## ocean_proximity_1H_OCEAN -0.0162807691 0.012381030 0.06951887
## ocean_proximity_INLAND 0.0263535919 -0.013435002 -0.03799835
## ocean_proximity_NEAR_BAY 0.0003676967 -0.003761993 -0.03433719
## ocean_proximity_NEAR_OCEAN -0.0134742604 0.003929836 -0.02140617
## rooms_per_household 0.1397245138 0.013422080 -0.07445637
## bedrooms_per_room -0.1875404824 0.076759152 0.03409933
## population_per_household -0.0241525895 -0.027629035 0.06532818
## log_total_rooms 0.7940511233 0.771069090 0.69415683
## log_total_bedrooms 0.7506468980 0.817471436 0.72221616
## log_population 0.6868776974 0.725579093 0.79772220
## log_households 0.7314149724 0.789293297 0.73601806
## log_median_income 0.2322828171 0.042326951 0.05389188
## log_median_house_value 0.1652561028 0.100893149 0.05226457
## log_rooms_per_household -0.1963873711 -0.327411813 -0.35015483
## log_bedrooms_per_room 0.2734459719 0.491888221 0.41129794
## log_population_per_household -0.2756666873 -0.317412823 -0.09722667
## households median_income
## longitude 0.049387935 -0.018606080
## latitude -0.069338040 -0.074272054
## housing_median_age -0.310451853 -0.197932095
## total_rooms 0.922422602 0.222708787
## total_bedrooms 0.978746198 0.020802496
## population 0.910682455 0.040159411
## households 1.000000000 0.045468585
## median_income 0.045468585 1.000000000
## ocean_proximity_1H_OCEAN 0.038132019 0.184800134
## ocean_proximity_INLAND -0.047228557 -0.228050323
## ocean_proximity_NEAR_BAY 0.005665539 0.067867661
## ocean_proximity_NEAR_OCEAN 0.006090457 -0.005749940
## rooms_per_household -0.078929107 0.311129787
## bedrooms_per_room 0.058160385 -0.634558982
## population_per_household -0.026534027 0.025866833
## log_total_rooms 0.769172585 0.234052911
## log_total_bedrooms 0.808594866 0.009043832
## log_population 0.759431749 0.037795960
## log_households 0.815401150 0.042968702
## log_median_income 0.067472361 0.958447829
## log_median_house_value 0.122602210 0.657736638
## log_rooms_per_household -0.384828841 0.256478728
## log_bedrooms_per_room 0.474746017 -0.478212118
## log_population_per_household -0.313887439 -0.016494389
## ocean_proximity_1H_OCEAN ocean_proximity_INLAND
## longitude 0.285419479 -0.09221127
## latitude -0.428637043 0.38616612
## housing_median_age 0.117624750 -0.21969804
## total_rooms -0.016280769 0.02635359
## total_bedrooms 0.012381030 -0.01343500
## population 0.069518865 -0.03799835
## households 0.038132019 -0.04722856
## median_income 0.184800134 -0.22805032
## ocean_proximity_1H_OCEAN 1.000000000 -0.65541248
## ocean_proximity_INLAND -0.655412478 1.00000000
## ocean_proximity_NEAR_BAY -0.267518416 -0.21501585
## ocean_proximity_NEAR_OCEAN -0.341321517 -0.27433452
## rooms_per_household -0.126176671 0.18085036
## bedrooms_per_room 0.104289188 -0.14222788
## population_per_household -0.002092085 0.01133422
## log_total_rooms 0.009729217 -0.01091605
## log_total_bedrooms 0.038781199 -0.05283844
## log_population 0.116629610 -0.09485921
## log_households 0.073411382 -0.09864180
## log_median_income 0.185238287 -0.23232746
## log_median_house_value 0.382798776 -0.56963529
## log_rooms_per_household -0.144206718 0.19766611
## log_bedrooms_per_room 0.094314045 -0.12904802
## log_population_per_household 0.033718204 0.04757230
## ocean_proximity_NEAR_BAY
## longitude -0.4117854923
## latitude 0.3070683763
## housing_median_age 0.1233761099
## total_rooms 0.0003676967
## total_bedrooms -0.0037619932
## population -0.0343371915
## households 0.0056655389
## median_income 0.0678676612
## ocean_proximity_1H_OCEAN -0.2675184160
## ocean_proximity_INLAND -0.2150158524
## ocean_proximity_NEAR_BAY 1.0000000000
## ocean_proximity_NEAR_OCEAN -0.1119745800
## rooms_per_household -0.0201352195
## bedrooms_per_room -0.0210359696
## population_per_household -0.0120023879
## log_total_rooms -0.0007979923
## log_total_bedrooms -0.0080782466
## log_population -0.0378127369
## log_households 0.0036637247
## log_median_income 0.0633516305
## log_median_house_value 0.1184683725
## log_rooms_per_household -0.0113660622
## log_bedrooms_per_room -0.0232309601
## log_population_per_household -0.0682139322
## ocean_proximity_NEAR_OCEAN rooms_per_household
## longitude 0.042187955 -0.038449415
## latitude -0.162919971 0.130983264
## housing_median_age 0.037242109 -0.178761998
## total_rooms -0.013474260 0.139724514
## total_bedrooms 0.003929836 0.013422080
## population -0.021406165 -0.074456368
## households 0.006090457 -0.078929107
## median_income -0.005749940 0.311129787
## ocean_proximity_1H_OCEAN -0.341321517 -0.126176671
## ocean_proximity_INLAND -0.274334523 0.180850360
## ocean_proximity_NEAR_BAY -0.111974580 -0.020135220
## ocean_proximity_NEAR_OCEAN 1.000000000 -0.053486583
## rooms_per_household -0.053486583 1.000000000
## bedrooms_per_room 0.064356659 -0.415270347
## population_per_household -0.003237739 -0.007216777
## log_total_rooms 0.002008713 0.139160080
## log_total_bedrooms 0.024397651 0.001579552
## log_population -0.007586525 -0.177676942
## log_households 0.028601981 -0.162566947
## log_median_income 0.003446060 0.295219340
## log_median_house_value 0.145005058 0.098044494
## log_rooms_per_household -0.057864508 0.659298701
## log_bedrooms_per_room 0.062433286 -0.298376578
## log_population_per_household -0.062813422 0.038223326
## bedrooms_per_room population_per_household
## longitude 0.1157156990 0.0004556728
## latitude -0.1430564446 0.0025664977
## housing_median_age 0.1442616560 0.0215534933
## total_rooms -0.1875404824 -0.0241525895
## total_bedrooms 0.0767591524 -0.0276290346
## population 0.0340993331 0.0653281812
## households 0.0581603852 -0.0265340274
## median_income -0.6345589816 0.0258668333
## ocean_proximity_1H_OCEAN 0.1042891883 -0.0020920846
## ocean_proximity_INLAND -0.1422278798 0.0113342227
## ocean_proximity_NEAR_BAY -0.0210359696 -0.0120023879
## ocean_proximity_NEAR_OCEAN 0.0643566593 -0.0032377391
## rooms_per_household -0.4152703470 -0.0072167769
## bedrooms_per_room 1.0000000000 0.0043603587
## population_per_household 0.0043603587 1.0000000000
## log_total_rooms -0.2477146086 -0.0835735056
## log_total_bedrooms 0.0617609405 -0.0846318913
## log_population 0.0172628437 0.0417694650
## log_households 0.0321968961 -0.0810461733
## log_median_income -0.6489189054 0.0145874010
## log_median_house_value -0.2182050354 -0.0188189580
## log_rooms_per_household -0.4207453003 0.0706099308
## log_bedrooms_per_room 0.7211657237 -0.0853377761
## log_population_per_household -0.0005781063 0.5528646835
## log_total_rooms log_total_bedrooms log_population
## longitude 0.0209708958 0.057667447 0.103087451
## latitude -0.0246666450 -0.068843342 -0.141221893
## housing_median_age -0.3248170511 -0.277635842 -0.211607821
## total_rooms 0.7940511233 0.750646898 0.686877697
## total_bedrooms 0.7710690903 0.817471436 0.725579093
## population 0.6941568286 0.722216158 0.797722200
## households 0.7691725851 0.808594866 0.759431749
## median_income 0.2340529114 0.009043832 0.037795960
## ocean_proximity_1H_OCEAN 0.0097292173 0.038781199 0.116629610
## ocean_proximity_INLAND -0.0109160491 -0.052838437 -0.094859210
## ocean_proximity_NEAR_BAY -0.0007979923 -0.008078247 -0.037812737
## ocean_proximity_NEAR_OCEAN 0.0020087127 0.024397651 -0.007586525
## rooms_per_household 0.1391600797 0.001579552 -0.177676942
## bedrooms_per_room -0.2477146086 0.061760940 0.017262844
## population_per_household -0.0835735056 -0.084631891 0.041769465
## log_total_rooms 1.0000000000 0.949123739 0.863753047
## log_total_bedrooms 0.9491237386 1.000000000 0.895265226
## log_population 0.8637530465 0.895265226 1.000000000
## log_households 0.9326880798 0.972298862 0.933558108
## log_median_income 0.2671505080 0.045589068 0.069285155
## log_median_house_value 0.1945087697 0.110101047 0.067248042
## log_rooms_per_household -0.3705381193 -0.520693264 -0.601721079
## log_bedrooms_per_room 0.4350723248 0.686033805 0.586311406
## log_population_per_household -0.4906483419 -0.511370349 -0.230411960
## log_households log_median_income
## longitude 0.053261479 -0.01989074
## latitude -0.091996515 -0.07591269
## housing_median_age -0.241974142 -0.18403576
## total_rooms 0.731414972 0.23228282
## total_bedrooms 0.789293297 0.04232695
## population 0.736018062 0.05389188
## households 0.815401150 0.06747236
## median_income 0.042968702 0.95844783
## ocean_proximity_1H_OCEAN 0.073411382 0.18523829
## ocean_proximity_INLAND -0.098641795 -0.23232746
## ocean_proximity_NEAR_BAY 0.003663725 0.06335163
## ocean_proximity_NEAR_OCEAN 0.028601981 0.00344606
## rooms_per_household -0.162566947 0.29521934
## bedrooms_per_room 0.032196896 -0.64891891
## population_per_household -0.081046173 0.01458740
## log_total_rooms 0.932688080 0.26715051
## log_total_bedrooms 0.972298862 0.04558907
## log_population 0.933558108 0.06928515
## log_households 1.000000000 0.08102491
## log_median_income 0.081024906 1.00000000
## log_median_house_value 0.138879114 0.65700179
## log_rooms_per_household -0.630269747 0.22346051
## log_bedrooms_per_room 0.645308828 -0.45100734
## log_population_per_household -0.507470925 -0.04566184
## log_median_house_value log_rooms_per_household
## longitude 0.002820613 -0.07009905
## latitude -0.217282976 0.15148892
## housing_median_age -0.002693287 -0.05157560
## total_rooms 0.165256103 -0.19638737
## total_bedrooms 0.100893149 -0.32741181
## population 0.052264569 -0.35015483
## households 0.122602210 -0.38482884
## median_income 0.657736638 0.25647873
## ocean_proximity_1H_OCEAN 0.382798776 -0.14420672
## ocean_proximity_INLAND -0.569635289 0.19766611
## ocean_proximity_NEAR_BAY 0.118468373 -0.01136606
## ocean_proximity_NEAR_OCEAN 0.145005058 -0.05786451
## rooms_per_household 0.098044494 0.65929870
## bedrooms_per_room -0.218205035 -0.42074530
## population_per_household -0.018818958 0.07060993
## log_total_rooms 0.194508770 -0.37053812
## log_total_bedrooms 0.110101047 -0.52069326
## log_population 0.067248042 -0.60172108
## log_households 0.138879114 -0.63026975
## log_median_income 0.657001793 0.22346051
## log_median_house_value 1.000000000 0.02077848
## log_rooms_per_household 0.020778480 1.00000000
## log_bedrooms_per_room -0.125049088 -0.69434825
## log_population_per_household -0.156609388 0.46168538
## log_bedrooms_per_room log_population_per_household
## longitude 0.11059472 0.0623846343
## latitude -0.13486012 -0.0469765656
## housing_median_age -0.02612265 0.1057234910
## total_rooms 0.27344597 -0.2756666873
## total_bedrooms 0.49188822 -0.3174128230
## population 0.41129794 -0.0972266706
## households 0.47474602 -0.3138874388
## median_income -0.47821212 -0.0164943895
## ocean_proximity_1H_OCEAN 0.09431404 0.0337182043
## ocean_proximity_INLAND -0.12904802 0.0475723024
## ocean_proximity_NEAR_BAY -0.02323096 -0.0682139322
## ocean_proximity_NEAR_OCEAN 0.06243329 -0.0628134216
## rooms_per_household -0.29837658 0.0382233257
## bedrooms_per_room 0.72116572 -0.0005781063
## population_per_household -0.08533778 0.5528646835
## log_total_rooms 0.43507232 -0.4906483419
## log_total_bedrooms 0.68603380 -0.5113703493
## log_population 0.58631141 -0.2304119597
## log_households 0.64530883 -0.5074709249
## log_median_income -0.45100734 -0.0456618362
## log_median_house_value -0.12504909 -0.1566093882
## log_rooms_per_household -0.69434825 0.4616853762
## log_bedrooms_per_room 1.00000000 -0.3920847272
## log_population_per_household -0.39208473 1.0000000000
sdf=df #clean_data_frame
summary(sdf)
## longitude latitude housing_median_age total_rooms
## Min. :-124.3 Min. :32.54 Min. : 1.00 Min. : 2
## 1st Qu.:-121.8 1st Qu.:33.93 1st Qu.:18.00 1st Qu.: 1450
## Median :-118.5 Median :34.26 Median :29.00 Median : 2127
## Mean :-119.6 Mean :35.63 Mean :28.63 Mean : 2636
## 3rd Qu.:-118.0 3rd Qu.:37.72 3rd Qu.:37.00 3rd Qu.: 3143
## Max. :-114.3 Max. :41.95 Max. :52.00 Max. :39320
## total_bedrooms population households median_income
## Min. : 1.0 Min. : 3 Min. : 1.0 Min. : 0.4999
## 1st Qu.: 296.0 1st Qu.: 787 1st Qu.: 280.0 1st Qu.: 2.5637
## Median : 435.0 Median : 1166 Median : 409.0 Median : 3.5365
## Mean : 537.9 Mean : 1425 Mean : 499.4 Mean : 3.8712
## 3rd Qu.: 647.0 3rd Qu.: 1722 3rd Qu.: 604.0 3rd Qu.: 4.7440
## Max. :6445.0 Max. :35682 Max. :6082.0 Max. :15.0001
## median_house_value ocean_proximity_1H_OCEAN ocean_proximity_INLAND
## Min. : 14999 Min. :0.0000 Min. :0.0000
## 1st Qu.:119500 1st Qu.:0.0000 1st Qu.:0.0000
## Median :179700 Median :0.0000 Median :0.0000
## Mean :206864 Mean :0.4421 Mean :0.3179
## 3rd Qu.:264700 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :500001 Max. :1.0000 Max. :1.0000
## ocean_proximity_ISLAND ocean_proximity_NEAR_BAY ocean_proximity_NEAR_OCEAN
## Min. :0.0000000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000000 Median :0.0000 Median :0.0000
## Mean :0.0002447 Mean :0.1111 Mean :0.1286
## 3rd Qu.:0.0000000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000000 Max. :1.0000 Max. :1.0000
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
#QUITAMOS LOS NAN's
#QUITAMOS 'ISLANDS'
sdf<-sdf[,-12]
#QUITMOS LONGITUD Y LATITUD
sdf <- sdf[,-1:-2]
#REMOVEMOS ALGUNOS OUTLAYERS
sdf<-sdf[sdf$median_house_value <500000, ]
sdf<-sdf[sdf$median_income <15, ]
sdf<-sdf[sdf$housing_median_age <49, ]
sdf$rooms_per_household <- sdf$total_rooms/ sdf$households
sdf$bedrooms_per_room= sdf$total_bedrooms/ sdf$total_rooms
sdf$population_per_household=sdf$population/ sdf$households
sdf$median_house_value=rescale(sdf$median_house_value,to=c(1,15))
sdf$housing_median_age=rescale(sdf$housing_median_age,to=c(1,15))
sdf$total_rooms=rescale(sdf$total_rooms,to=c(1,15))
sdf$total_bedrooms=rescale(sdf$total_bedrooms,to=c(1,15))
sdf$population=rescale(sdf$population,to=c(1,15))
sdf$households=rescale(sdf$households,to=c(1,15))
sdf$rooms_per_household=rescale(sdf$rooms_per_household,to=c(1,15))
sdf$bedrooms_per_room=rescale(sdf$bedrooms_per_room,to=c(1,15))
sdf$population_per_household=rescale(sdf$population_per_household,to=c(1,15))
sdf$log_total_rooms=log(sdf$total_rooms)
sdf$log_total_bedrooms=log(sdf$total_bedrooms)
sdf$log_population=log(sdf$population)
sdf$log_households=log(sdf$households)
sdf$log_median_income=log(sdf$median_income)
sdf$log_median_house_value=
sdf$log_rooms_per_household <- log(sdf$rooms_per_household)
sdf$log_bedrooms_per_room= log(sdf$bedrooms_per_room)
sdf$log_population_per_household=log(sdf$population_per_household)
sdf2=sdf
sdf2$median_house_value=NULL
sdf$log_median_house_value=NULL
summary(sdf)
## housing_median_age total_rooms total_bedrooms population
## Min. : 1.000 Min. : 1.000 Min. : 1.000 Min. : 1.000
## 1st Qu.: 5.766 1st Qu.: 1.520 1st Qu.: 1.650 1st Qu.: 1.319
## Median : 8.745 Median : 1.766 Median : 1.958 Median : 1.474
## Mean : 8.627 Mean : 1.955 Mean : 2.192 Mean : 1.579
## 3rd Qu.:11.128 3rd Qu.: 2.139 3rd Qu.: 2.432 3rd Qu.: 1.700
## Max. :15.000 Max. :15.000 Max. :15.000 Max. :15.000
## households median_income median_house_value ocean_proximity_1H_OCEAN
## Min. : 1.000 Min. : 0.4999 Min. : 1.000 Min. :0.0000
## 1st Qu.: 1.652 1st Qu.: 2.5429 1st Qu.: 3.901 1st Qu.:0.0000
## Median : 1.956 Median : 3.4821 Median : 5.514 Median :0.0000
## Mean : 2.172 Mean : 3.7014 Mean : 6.037 Mean :0.4492
## 3rd Qu.: 2.414 3rd Qu.: 4.6250 3rd Qu.: 7.584 3rd Qu.:1.0000
## Max. :15.000 Max. :13.1477 Max. :15.000 Max. :1.0000
## ocean_proximity_INLAND ocean_proximity_NEAR_BAY ocean_proximity_NEAR_OCEAN
## Min. :0.000 Min. :0.00000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.00000 1st Qu.:0.000
## Median :0.000 Median :0.00000 Median :0.000
## Mean :0.345 Mean :0.08068 Mean :0.125
## 3rd Qu.:1.000 3rd Qu.:0.00000 3rd Qu.:0.000
## Max. :1.000 Max. :1.00000 Max. :1.000
## rooms_per_household bedrooms_per_room population_per_household
## Min. : 1.000 Min. : 1.000 Min. : 1.000
## 1st Qu.: 1.382 1st Qu.: 2.195 1st Qu.: 1.020
## Median : 1.465 Median : 2.611 Median : 1.024
## Mean : 1.484 Mean : 2.769 Mean : 1.027
## 3rd Qu.: 1.548 3rd Qu.: 3.179 3rd Qu.: 1.030
## Max. :15.000 Max. :15.000 Max. :15.000
## log_total_rooms log_total_bedrooms log_population log_households
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.4186 1st Qu.:0.5006 1st Qu.:0.2769 1st Qu.:0.5018
## Median :0.5685 Median :0.6721 Median :0.3880 Median :0.6707
## Mean :0.6163 Mean :0.7218 Mean :0.4270 Mean :0.7153
## 3rd Qu.:0.7603 3rd Qu.:0.8887 3rd Qu.:0.5306 3rd Qu.:0.8812
## Max. :2.7081 Max. :2.7081 Max. :2.7081 Max. :2.7081
## log_median_income log_rooms_per_household log_bedrooms_per_room
## Min. :-0.6933 Min. :0.0000 Min. :0.0000
## 1st Qu.: 0.9333 1st Qu.:0.3233 1st Qu.:0.7864
## Median : 1.2476 Median :0.3819 Median :0.9596
## Mean : 1.2174 Mean :0.3868 Mean :0.9774
## 3rd Qu.: 1.5315 3rd Qu.:0.4369 3rd Qu.:1.1566
## Max. : 2.5762 Max. :2.7081 Max. :2.7081
## log_population_per_household
## Min. :0.00000
## 1st Qu.:0.01986
## Median :0.02413
## Mean :0.02581
## 3rd Qu.:0.02921
## Max. :2.70805
corrmatrix = cor(sdf)
t(corrmatrix)
## housing_median_age total_rooms total_bedrooms
## housing_median_age 1.00000000 -0.3747630026 -0.331309723
## total_rooms -0.37476300 1.0000000000 0.935422796
## total_bedrooms -0.33130972 0.9354227956 1.000000000
## population -0.27718377 0.8602300574 0.880212004
## households -0.31045185 0.9224226021 0.978746198
## median_income -0.19793210 0.2227087874 0.020802496
## median_house_value 0.01381800 0.1532913001 0.079970469
## ocean_proximity_1H_OCEAN 0.11762475 -0.0162807691 0.012381030
## ocean_proximity_INLAND -0.21969804 0.0263535919 -0.013435002
## ocean_proximity_NEAR_BAY 0.12337611 0.0003676967 -0.003761993
## ocean_proximity_NEAR_OCEAN 0.03724211 -0.0134742604 0.003929836
## rooms_per_household -0.17876200 0.1397245138 0.013422080
## bedrooms_per_room 0.14426166 -0.1875404824 0.076759152
## population_per_household 0.02155349 -0.0241525895 -0.027629035
## log_total_rooms -0.39347049 0.9419308631 0.900431077
## log_total_bedrooms -0.33580108 0.8708651621 0.946099399
## log_population -0.28268819 0.8254358961 0.861862266
## log_households -0.30696984 0.8565500468 0.922279441
## log_median_income -0.18403576 0.2322828171 0.042326951
## log_rooms_per_household -0.23622198 0.1979466718 0.008893265
## log_bedrooms_per_room 0.17686465 -0.2005556579 0.078948657
## log_population_per_household 0.03483904 -0.0522278774 -0.063176207
## population households median_income
## housing_median_age -0.27718377 -0.310451853 -0.197932095
## total_rooms 0.86023006 0.922422602 0.222708787
## total_bedrooms 0.88021200 0.978746198 0.020802496
## population 1.00000000 0.910682455 0.040159411
## households 0.91068245 1.000000000 0.045468585
## median_income 0.04015941 0.045468585 1.000000000
## median_house_value 0.02227064 0.099489960 0.665774849
## ocean_proximity_1H_OCEAN 0.06951887 0.038132019 0.184800134
## ocean_proximity_INLAND -0.03799835 -0.047228557 -0.228050323
## ocean_proximity_NEAR_BAY -0.03433719 0.005665539 0.067867661
## ocean_proximity_NEAR_OCEAN -0.02140617 0.006090457 -0.005749940
## rooms_per_household -0.07445637 -0.078929107 0.311129787
## bedrooms_per_room 0.03409933 0.058160385 -0.634558982
## population_per_household 0.06532818 -0.026534027 0.025866833
## log_total_rooms 0.81323363 0.892859889 0.250995266
## log_total_bedrooms 0.83073874 0.930785486 0.010404776
## log_population 0.95460000 0.894514840 0.036226741
## log_households 0.85452266 0.946890218 0.042210551
## log_median_income 0.05389188 0.067472361 0.958447829
## log_rooms_per_household -0.07534127 -0.082167389 0.465248251
## log_bedrooms_per_room 0.02771288 0.057217926 -0.723887316
## log_population_per_household 0.10432185 -0.057916990 0.007957251
## median_house_value ocean_proximity_1H_OCEAN
## housing_median_age 0.01381800 0.117624750
## total_rooms 0.15329130 -0.016280769
## total_bedrooms 0.07997047 0.012381030
## population 0.02227064 0.069518865
## households 0.09948996 0.038132019
## median_income 0.66577485 0.184800134
## median_house_value 1.00000000 0.321038511
## ocean_proximity_1H_OCEAN 0.32103851 1.000000000
## ocean_proximity_INLAND -0.50301677 -0.655412478
## ocean_proximity_NEAR_BAY 0.11481952 -0.267518416
## ocean_proximity_NEAR_OCEAN 0.14500129 -0.341321517
## rooms_per_household 0.11363592 -0.126176671
## bedrooms_per_room -0.23391359 0.104289188
## population_per_household -0.01991006 -0.002092085
## log_total_rooms 0.18386617 -0.010962608
## log_total_bedrooms 0.09091698 0.021539760
## log_population 0.02471486 0.090670654
## log_households 0.11432395 0.053704989
## log_median_income 0.62987265 0.185238287
## log_rooms_per_household 0.17234770 -0.158836967
## log_bedrooms_per_room -0.30481258 0.084508064
## log_population_per_household -0.06682306 0.023595154
## ocean_proximity_INLAND ocean_proximity_NEAR_BAY
## housing_median_age -0.219698042 0.1233761099
## total_rooms 0.026353592 0.0003676967
## total_bedrooms -0.013435002 -0.0037619932
## population -0.037998355 -0.0343371915
## households -0.047228557 0.0056655389
## median_income -0.228050323 0.0678676612
## median_house_value -0.503016774 0.1148195192
## ocean_proximity_1H_OCEAN -0.655412478 -0.2675184160
## ocean_proximity_INLAND 1.000000000 -0.2150158524
## ocean_proximity_NEAR_BAY -0.215015852 1.0000000000
## ocean_proximity_NEAR_OCEAN -0.274334523 -0.1119745800
## rooms_per_household 0.180850360 -0.0201352195
## bedrooms_per_room -0.142227880 -0.0210359696
## population_per_household 0.011334223 -0.0120023879
## log_total_rooms 0.014764616 0.0018609286
## log_total_bedrooms -0.032381997 -0.0046413556
## log_population -0.058401005 -0.0390712783
## log_households -0.073495620 0.0057757582
## log_median_income -0.232327460 0.0633516305
## log_rooms_per_household 0.223796236 -0.0179560078
## log_bedrooms_per_room -0.123799997 -0.0226607031
## log_population_per_household 0.009430231 -0.0362119402
## ocean_proximity_NEAR_OCEAN rooms_per_household
## housing_median_age 0.037242109 -0.178761998
## total_rooms -0.013474260 0.139724514
## total_bedrooms 0.003929836 0.013422080
## population -0.021406165 -0.074456368
## households 0.006090457 -0.078929107
## median_income -0.005749940 0.311129787
## median_house_value 0.145001289 0.113635921
## ocean_proximity_1H_OCEAN -0.341321517 -0.126176671
## ocean_proximity_INLAND -0.274334523 0.180850360
## ocean_proximity_NEAR_BAY -0.111974580 -0.020135220
## ocean_proximity_NEAR_OCEAN 1.000000000 -0.053486583
## rooms_per_household -0.053486583 1.000000000
## bedrooms_per_room 0.064356659 -0.415270347
## population_per_household -0.003237739 -0.007216777
## log_total_rooms -0.005971760 0.151481550
## log_total_bedrooms 0.018127233 0.003540664
## log_population -0.019922289 -0.109742072
## log_households 0.020386282 -0.118986508
## log_median_income 0.003446060 0.295219340
## log_rooms_per_household -0.067851181 0.919151674
## log_bedrooms_per_room 0.068966766 -0.436854929
## log_population_per_household -0.019142021 -0.018714076
## bedrooms_per_room population_per_household
## housing_median_age 0.144261656 0.021553493
## total_rooms -0.187540482 -0.024152590
## total_bedrooms 0.076759152 -0.027629035
## population 0.034099333 0.065328181
## households 0.058160385 -0.026534027
## median_income -0.634558982 0.025866833
## median_house_value -0.233913588 -0.019910059
## ocean_proximity_1H_OCEAN 0.104289188 -0.002092085
## ocean_proximity_INLAND -0.142227880 0.011334223
## ocean_proximity_NEAR_BAY -0.021035970 -0.012002388
## ocean_proximity_NEAR_OCEAN 0.064356659 -0.003237739
## rooms_per_household -0.415270347 -0.007216777
## bedrooms_per_room 1.000000000 0.004360359
## population_per_household 0.004360359 1.000000000
## log_total_rooms -0.222571415 -0.037566024
## log_total_bedrooms 0.090352054 -0.040941451
## log_population 0.046275437 0.058523351
## log_households 0.064692998 -0.039394648
## log_median_income -0.648918905 0.014587401
## log_rooms_per_household -0.634509189 -0.009666021
## log_bedrooms_per_room 0.966803048 0.003411033
## log_population_per_household 0.006328297 0.934938587
## log_total_rooms log_total_bedrooms log_population
## housing_median_age -0.393470487 -0.335801084 -0.28268819
## total_rooms 0.941930863 0.870865162 0.82543590
## total_bedrooms 0.900431077 0.946099399 0.86186227
## population 0.813233625 0.830738742 0.95460000
## households 0.892859889 0.930785486 0.89451484
## median_income 0.250995266 0.010404776 0.03622674
## median_house_value 0.183866169 0.090916985 0.02471486
## ocean_proximity_1H_OCEAN -0.010962608 0.021539760 0.09067065
## ocean_proximity_INLAND 0.014764616 -0.032381997 -0.05840100
## ocean_proximity_NEAR_BAY 0.001860929 -0.004641356 -0.03907128
## ocean_proximity_NEAR_OCEAN -0.005971760 0.018127233 -0.01992229
## rooms_per_household 0.151481550 0.003540664 -0.10974207
## bedrooms_per_room -0.222571415 0.090352054 0.04627544
## population_per_household -0.037566024 -0.040941451 0.05852335
## log_total_rooms 1.000000000 0.937263502 0.85690868
## log_total_bedrooms 0.937263502 1.000000000 0.88865109
## log_population 0.856908682 0.888651089 1.00000000
## log_households 0.925040691 0.977514499 0.91828515
## log_median_income 0.270005509 0.038784076 0.05517852
## log_rooms_per_household 0.216975238 -0.005331079 -0.11229390
## log_bedrooms_per_room -0.231886414 0.097919865 0.04262742
## log_population_per_household -0.076519419 -0.084229164 0.10149631
## log_households log_median_income
## housing_median_age -0.306969839 -0.18403576
## total_rooms 0.856550047 0.23228282
## total_bedrooms 0.922279441 0.04232695
## population 0.854522659 0.05389188
## households 0.946890218 0.06747236
## median_income 0.042210551 0.95844783
## median_house_value 0.114323955 0.62987265
## ocean_proximity_1H_OCEAN 0.053704989 0.18523829
## ocean_proximity_INLAND -0.073495620 -0.23232746
## ocean_proximity_NEAR_BAY 0.005775758 0.06335163
## ocean_proximity_NEAR_OCEAN 0.020386282 0.00344606
## rooms_per_household -0.118986508 0.29521934
## bedrooms_per_room 0.064692998 -0.64891891
## population_per_household -0.039394648 0.01458740
## log_total_rooms 0.925040691 0.27000551
## log_total_bedrooms 0.977514499 0.03878408
## log_population 0.918285150 0.05517852
## log_households 1.000000000 0.07118328
## log_median_income 0.071183285 1.00000000
## log_rooms_per_household -0.121511336 0.44635698
## log_bedrooms_per_room 0.068143022 -0.71121101
## log_population_per_household -0.077586345 -0.00455519
## log_rooms_per_household log_bedrooms_per_room
## housing_median_age -0.236221982 0.176864653
## total_rooms 0.197946672 -0.200555658
## total_bedrooms 0.008893265 0.078948657
## population -0.075341266 0.027712883
## households -0.082167389 0.057217926
## median_income 0.465248251 -0.723887316
## median_house_value 0.172347704 -0.304812575
## ocean_proximity_1H_OCEAN -0.158836967 0.084508064
## ocean_proximity_INLAND 0.223796236 -0.123799997
## ocean_proximity_NEAR_BAY -0.017956008 -0.022660703
## ocean_proximity_NEAR_OCEAN -0.067851181 0.068966766
## rooms_per_household 0.919151674 -0.436854929
## bedrooms_per_room -0.634509189 0.966803048
## population_per_household -0.009666021 0.003411033
## log_total_rooms 0.216975238 -0.231886414
## log_total_bedrooms -0.005331079 0.097919865
## log_population -0.112293897 0.042627419
## log_households -0.121511336 0.068143022
## log_median_income 0.446356981 -0.711211014
## log_rooms_per_household 1.000000000 -0.660985996
## log_bedrooms_per_room -0.660985996 1.000000000
## log_population_per_household -0.021890595 0.001529054
## log_population_per_household
## housing_median_age 0.034839036
## total_rooms -0.052227877
## total_bedrooms -0.063176207
## population 0.104321849
## households -0.057916990
## median_income 0.007957251
## median_house_value -0.066823064
## ocean_proximity_1H_OCEAN 0.023595154
## ocean_proximity_INLAND 0.009430231
## ocean_proximity_NEAR_BAY -0.036211940
## ocean_proximity_NEAR_OCEAN -0.019142021
## rooms_per_household -0.018714076
## bedrooms_per_room 0.006328297
## population_per_household 0.934938587
## log_total_rooms -0.076519419
## log_total_bedrooms -0.084229164
## log_population 0.101496308
## log_households -0.077586345
## log_median_income -0.004555190
## log_rooms_per_household -0.021890595
## log_bedrooms_per_room 0.001529054
## log_population_per_household 1.000000000
corrmatrix = cor(sdf2)
t(corrmatrix)
## housing_median_age total_rooms total_bedrooms
## housing_median_age 1.00000000 -0.3747630026 -0.331309723
## total_rooms -0.37476300 1.0000000000 0.935422796
## total_bedrooms -0.33130972 0.9354227956 1.000000000
## population -0.27718377 0.8602300574 0.880212004
## households -0.31045185 0.9224226021 0.978746198
## median_income -0.19793210 0.2227087874 0.020802496
## ocean_proximity_1H_OCEAN 0.11762475 -0.0162807691 0.012381030
## ocean_proximity_INLAND -0.21969804 0.0263535919 -0.013435002
## ocean_proximity_NEAR_BAY 0.12337611 0.0003676967 -0.003761993
## ocean_proximity_NEAR_OCEAN 0.03724211 -0.0134742604 0.003929836
## rooms_per_household -0.17876200 0.1397245138 0.013422080
## bedrooms_per_room 0.14426166 -0.1875404824 0.076759152
## population_per_household 0.02155349 -0.0241525895 -0.027629035
## log_total_rooms -0.39347049 0.9419308631 0.900431077
## log_total_bedrooms -0.33580108 0.8708651621 0.946099399
## log_population -0.28268819 0.8254358961 0.861862266
## log_households -0.30696984 0.8565500468 0.922279441
## log_median_income -0.18403576 0.2322828171 0.042326951
## log_rooms_per_household -0.23622198 0.1979466718 0.008893265
## log_median_house_value -0.23622198 0.1979466718 0.008893265
## log_bedrooms_per_room 0.17686465 -0.2005556579 0.078948657
## log_population_per_household 0.03483904 -0.0522278774 -0.063176207
## population households median_income
## housing_median_age -0.27718377 -0.310451853 -0.197932095
## total_rooms 0.86023006 0.922422602 0.222708787
## total_bedrooms 0.88021200 0.978746198 0.020802496
## population 1.00000000 0.910682455 0.040159411
## households 0.91068245 1.000000000 0.045468585
## median_income 0.04015941 0.045468585 1.000000000
## ocean_proximity_1H_OCEAN 0.06951887 0.038132019 0.184800134
## ocean_proximity_INLAND -0.03799835 -0.047228557 -0.228050323
## ocean_proximity_NEAR_BAY -0.03433719 0.005665539 0.067867661
## ocean_proximity_NEAR_OCEAN -0.02140617 0.006090457 -0.005749940
## rooms_per_household -0.07445637 -0.078929107 0.311129787
## bedrooms_per_room 0.03409933 0.058160385 -0.634558982
## population_per_household 0.06532818 -0.026534027 0.025866833
## log_total_rooms 0.81323363 0.892859889 0.250995266
## log_total_bedrooms 0.83073874 0.930785486 0.010404776
## log_population 0.95460000 0.894514840 0.036226741
## log_households 0.85452266 0.946890218 0.042210551
## log_median_income 0.05389188 0.067472361 0.958447829
## log_rooms_per_household -0.07534127 -0.082167389 0.465248251
## log_median_house_value -0.07534127 -0.082167389 0.465248251
## log_bedrooms_per_room 0.02771288 0.057217926 -0.723887316
## log_population_per_household 0.10432185 -0.057916990 0.007957251
## ocean_proximity_1H_OCEAN ocean_proximity_INLAND
## housing_median_age 0.117624750 -0.219698042
## total_rooms -0.016280769 0.026353592
## total_bedrooms 0.012381030 -0.013435002
## population 0.069518865 -0.037998355
## households 0.038132019 -0.047228557
## median_income 0.184800134 -0.228050323
## ocean_proximity_1H_OCEAN 1.000000000 -0.655412478
## ocean_proximity_INLAND -0.655412478 1.000000000
## ocean_proximity_NEAR_BAY -0.267518416 -0.215015852
## ocean_proximity_NEAR_OCEAN -0.341321517 -0.274334523
## rooms_per_household -0.126176671 0.180850360
## bedrooms_per_room 0.104289188 -0.142227880
## population_per_household -0.002092085 0.011334223
## log_total_rooms -0.010962608 0.014764616
## log_total_bedrooms 0.021539760 -0.032381997
## log_population 0.090670654 -0.058401005
## log_households 0.053704989 -0.073495620
## log_median_income 0.185238287 -0.232327460
## log_rooms_per_household -0.158836967 0.223796236
## log_median_house_value -0.158836967 0.223796236
## log_bedrooms_per_room 0.084508064 -0.123799997
## log_population_per_household 0.023595154 0.009430231
## ocean_proximity_NEAR_BAY
## housing_median_age 0.1233761099
## total_rooms 0.0003676967
## total_bedrooms -0.0037619932
## population -0.0343371915
## households 0.0056655389
## median_income 0.0678676612
## ocean_proximity_1H_OCEAN -0.2675184160
## ocean_proximity_INLAND -0.2150158524
## ocean_proximity_NEAR_BAY 1.0000000000
## ocean_proximity_NEAR_OCEAN -0.1119745800
## rooms_per_household -0.0201352195
## bedrooms_per_room -0.0210359696
## population_per_household -0.0120023879
## log_total_rooms 0.0018609286
## log_total_bedrooms -0.0046413556
## log_population -0.0390712783
## log_households 0.0057757582
## log_median_income 0.0633516305
## log_rooms_per_household -0.0179560078
## log_median_house_value -0.0179560078
## log_bedrooms_per_room -0.0226607031
## log_population_per_household -0.0362119402
## ocean_proximity_NEAR_OCEAN rooms_per_household
## housing_median_age 0.037242109 -0.178761998
## total_rooms -0.013474260 0.139724514
## total_bedrooms 0.003929836 0.013422080
## population -0.021406165 -0.074456368
## households 0.006090457 -0.078929107
## median_income -0.005749940 0.311129787
## ocean_proximity_1H_OCEAN -0.341321517 -0.126176671
## ocean_proximity_INLAND -0.274334523 0.180850360
## ocean_proximity_NEAR_BAY -0.111974580 -0.020135220
## ocean_proximity_NEAR_OCEAN 1.000000000 -0.053486583
## rooms_per_household -0.053486583 1.000000000
## bedrooms_per_room 0.064356659 -0.415270347
## population_per_household -0.003237739 -0.007216777
## log_total_rooms -0.005971760 0.151481550
## log_total_bedrooms 0.018127233 0.003540664
## log_population -0.019922289 -0.109742072
## log_households 0.020386282 -0.118986508
## log_median_income 0.003446060 0.295219340
## log_rooms_per_household -0.067851181 0.919151674
## log_median_house_value -0.067851181 0.919151674
## log_bedrooms_per_room 0.068966766 -0.436854929
## log_population_per_household -0.019142021 -0.018714076
## bedrooms_per_room population_per_household
## housing_median_age 0.144261656 0.021553493
## total_rooms -0.187540482 -0.024152590
## total_bedrooms 0.076759152 -0.027629035
## population 0.034099333 0.065328181
## households 0.058160385 -0.026534027
## median_income -0.634558982 0.025866833
## ocean_proximity_1H_OCEAN 0.104289188 -0.002092085
## ocean_proximity_INLAND -0.142227880 0.011334223
## ocean_proximity_NEAR_BAY -0.021035970 -0.012002388
## ocean_proximity_NEAR_OCEAN 0.064356659 -0.003237739
## rooms_per_household -0.415270347 -0.007216777
## bedrooms_per_room 1.000000000 0.004360359
## population_per_household 0.004360359 1.000000000
## log_total_rooms -0.222571415 -0.037566024
## log_total_bedrooms 0.090352054 -0.040941451
## log_population 0.046275437 0.058523351
## log_households 0.064692998 -0.039394648
## log_median_income -0.648918905 0.014587401
## log_rooms_per_household -0.634509189 -0.009666021
## log_median_house_value -0.634509189 -0.009666021
## log_bedrooms_per_room 0.966803048 0.003411033
## log_population_per_household 0.006328297 0.934938587
## log_total_rooms log_total_bedrooms log_population
## housing_median_age -0.393470487 -0.335801084 -0.28268819
## total_rooms 0.941930863 0.870865162 0.82543590
## total_bedrooms 0.900431077 0.946099399 0.86186227
## population 0.813233625 0.830738742 0.95460000
## households 0.892859889 0.930785486 0.89451484
## median_income 0.250995266 0.010404776 0.03622674
## ocean_proximity_1H_OCEAN -0.010962608 0.021539760 0.09067065
## ocean_proximity_INLAND 0.014764616 -0.032381997 -0.05840100
## ocean_proximity_NEAR_BAY 0.001860929 -0.004641356 -0.03907128
## ocean_proximity_NEAR_OCEAN -0.005971760 0.018127233 -0.01992229
## rooms_per_household 0.151481550 0.003540664 -0.10974207
## bedrooms_per_room -0.222571415 0.090352054 0.04627544
## population_per_household -0.037566024 -0.040941451 0.05852335
## log_total_rooms 1.000000000 0.937263502 0.85690868
## log_total_bedrooms 0.937263502 1.000000000 0.88865109
## log_population 0.856908682 0.888651089 1.00000000
## log_households 0.925040691 0.977514499 0.91828515
## log_median_income 0.270005509 0.038784076 0.05517852
## log_rooms_per_household 0.216975238 -0.005331079 -0.11229390
## log_median_house_value 0.216975238 -0.005331079 -0.11229390
## log_bedrooms_per_room -0.231886414 0.097919865 0.04262742
## log_population_per_household -0.076519419 -0.084229164 0.10149631
## log_households log_median_income
## housing_median_age -0.306969839 -0.18403576
## total_rooms 0.856550047 0.23228282
## total_bedrooms 0.922279441 0.04232695
## population 0.854522659 0.05389188
## households 0.946890218 0.06747236
## median_income 0.042210551 0.95844783
## ocean_proximity_1H_OCEAN 0.053704989 0.18523829
## ocean_proximity_INLAND -0.073495620 -0.23232746
## ocean_proximity_NEAR_BAY 0.005775758 0.06335163
## ocean_proximity_NEAR_OCEAN 0.020386282 0.00344606
## rooms_per_household -0.118986508 0.29521934
## bedrooms_per_room 0.064692998 -0.64891891
## population_per_household -0.039394648 0.01458740
## log_total_rooms 0.925040691 0.27000551
## log_total_bedrooms 0.977514499 0.03878408
## log_population 0.918285150 0.05517852
## log_households 1.000000000 0.07118328
## log_median_income 0.071183285 1.00000000
## log_rooms_per_household -0.121511336 0.44635698
## log_median_house_value -0.121511336 0.44635698
## log_bedrooms_per_room 0.068143022 -0.71121101
## log_population_per_household -0.077586345 -0.00455519
## log_rooms_per_household log_median_house_value
## housing_median_age -0.236221982 -0.236221982
## total_rooms 0.197946672 0.197946672
## total_bedrooms 0.008893265 0.008893265
## population -0.075341266 -0.075341266
## households -0.082167389 -0.082167389
## median_income 0.465248251 0.465248251
## ocean_proximity_1H_OCEAN -0.158836967 -0.158836967
## ocean_proximity_INLAND 0.223796236 0.223796236
## ocean_proximity_NEAR_BAY -0.017956008 -0.017956008
## ocean_proximity_NEAR_OCEAN -0.067851181 -0.067851181
## rooms_per_household 0.919151674 0.919151674
## bedrooms_per_room -0.634509189 -0.634509189
## population_per_household -0.009666021 -0.009666021
## log_total_rooms 0.216975238 0.216975238
## log_total_bedrooms -0.005331079 -0.005331079
## log_population -0.112293897 -0.112293897
## log_households -0.121511336 -0.121511336
## log_median_income 0.446356981 0.446356981
## log_rooms_per_household 1.000000000 1.000000000
## log_median_house_value 1.000000000 1.000000000
## log_bedrooms_per_room -0.660985996 -0.660985996
## log_population_per_household -0.021890595 -0.021890595
## log_bedrooms_per_room log_population_per_household
## housing_median_age 0.176864653 0.034839036
## total_rooms -0.200555658 -0.052227877
## total_bedrooms 0.078948657 -0.063176207
## population 0.027712883 0.104321849
## households 0.057217926 -0.057916990
## median_income -0.723887316 0.007957251
## ocean_proximity_1H_OCEAN 0.084508064 0.023595154
## ocean_proximity_INLAND -0.123799997 0.009430231
## ocean_proximity_NEAR_BAY -0.022660703 -0.036211940
## ocean_proximity_NEAR_OCEAN 0.068966766 -0.019142021
## rooms_per_household -0.436854929 -0.018714076
## bedrooms_per_room 0.966803048 0.006328297
## population_per_household 0.003411033 0.934938587
## log_total_rooms -0.231886414 -0.076519419
## log_total_bedrooms 0.097919865 -0.084229164
## log_population 0.042627419 0.101496308
## log_households 0.068143022 -0.077586345
## log_median_income -0.711211014 -0.004555190
## log_rooms_per_household -0.660985996 -0.021890595
## log_median_house_value -0.660985996 -0.021890595
## log_bedrooms_per_room 1.000000000 0.001529054
## log_population_per_household 0.001529054 1.000000000
m0<- lm(df$median_house_value ~ . , df)
m1 <- lm(df$median_house_value ~ (.)^2 ,df)
m2<- lm(cdf$median_house_value ~ . , cdf)
m3 <- lm(cdf2$log_median_house_value ~ . , cdf2)
m4<- lm(sdf$median_house_value ~ . , sdf)
m5 <- lm(sdf2$log_median_house_value ~ . , sdf2)
summary(m0)
##
## Call:
## lm(formula = df$median_house_value ~ ., data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -556980 -42683 -10497 28765 779052
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.266e+06 8.837e+04 -25.640 < 2e-16 ***
## longitude -2.681e+04 1.020e+03 -26.296 < 2e-16 ***
## latitude -2.548e+04 1.005e+03 -25.363 < 2e-16 ***
## housing_median_age 1.073e+03 4.389e+01 24.439 < 2e-16 ***
## total_rooms -6.193e+00 7.915e-01 -7.825 5.32e-15 ***
## total_bedrooms 1.006e+02 6.869e+00 14.640 < 2e-16 ***
## population -3.797e+01 1.076e+00 -35.282 < 2e-16 ***
## households 4.962e+01 7.451e+00 6.659 2.83e-11 ***
## median_income 3.926e+04 3.380e+02 116.151 < 2e-16 ***
## ocean_proximity_1H_OCEAN -4.278e+03 1.570e+03 -2.726 0.006421 **
## ocean_proximity_INLAND -4.356e+04 2.250e+03 -19.363 < 2e-16 ***
## ocean_proximity_ISLAND 1.486e+05 3.075e+04 4.833 1.36e-06 ***
## ocean_proximity_NEAR_BAY -8.232e+03 2.176e+03 -3.784 0.000155 ***
## ocean_proximity_NEAR_OCEAN NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 68660 on 20420 degrees of freedom
## Multiple R-squared: 0.6465, Adjusted R-squared: 0.6463
## F-statistic: 3112 on 12 and 20420 DF, p-value: < 2.2e-16
summary(m1)
##
## Call:
## lm(formula = df$median_house_value ~ (.)^2, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -481870 -36549 -8853 25588 611554
##
## Coefficients: (23 not defined because of singularities)
## Estimate Std. Error
## (Intercept) -7.454e+06 8.751e+05
## longitude -5.244e+04 7.594e+03
## latitude 2.932e+05 2.875e+04
## housing_median_age -7.782e+04 7.771e+03
## total_rooms 9.507e+02 1.775e+02
## total_bedrooms -3.919e+03 1.113e+03
## population 3.584e+02 1.768e+02
## households -1.856e+03 1.067e+03
## median_income -7.990e+05 6.753e+04
## ocean_proximity_1H_OCEAN 6.833e+05 3.123e+05
## ocean_proximity_INLAND 1.228e+06 2.955e+05
## ocean_proximity_ISLAND 7.949e+08 3.272e+08
## ocean_proximity_NEAR_BAY -2.363e+07 1.268e+06
## ocean_proximity_NEAR_OCEAN NA NA
## longitude:latitude 2.149e+03 2.177e+02
## longitude:housing_median_age -9.823e+02 8.939e+01
## longitude:total_rooms 1.208e+01 2.089e+00
## longitude:total_bedrooms -5.506e+01 1.292e+01
## longitude:population 5.376e+00 2.081e+00
## longitude:households -2.007e+01 1.192e+01
## longitude:median_income -1.022e+04 7.959e+02
## longitude:ocean_proximity_1H_OCEAN 5.504e+03 3.725e+03
## longitude:ocean_proximity_INLAND 1.462e+04 3.437e+03
## longitude:ocean_proximity_ISLAND 1.448e+07 6.581e+06
## longitude:ocean_proximity_NEAR_BAY -2.338e+05 1.130e+04
## longitude:ocean_proximity_NEAR_OCEAN NA NA
## latitude:housing_median_age -1.125e+03 8.773e+01
## latitude:total_rooms 1.387e+01 2.146e+00
## latitude:total_bedrooms -7.087e+01 1.312e+01
## latitude:population 7.881e+00 2.212e+00
## latitude:households -2.126e+01 1.117e+01
## latitude:median_income -1.116e+04 8.235e+02
## latitude:ocean_proximity_1H_OCEAN -2.669e+02 3.887e+03
## latitude:ocean_proximity_INLAND 1.178e+04 3.419e+03
## latitude:ocean_proximity_ISLAND 2.755e+07 1.368e+07
## latitude:ocean_proximity_NEAR_BAY -1.310e+05 8.775e+03
## latitude:ocean_proximity_NEAR_OCEAN NA NA
## housing_median_age:total_rooms -3.606e-01 7.281e-02
## housing_median_age:total_bedrooms 2.030e+00 7.375e-01
## housing_median_age:population -1.483e+00 9.685e-02
## housing_median_age:households 5.172e+00 8.225e-01
## housing_median_age:median_income 1.336e+02 2.467e+01
## housing_median_age:ocean_proximity_1H_OCEAN -2.128e+02 1.340e+02
## housing_median_age:ocean_proximity_INLAND 7.225e+02 1.883e+02
## housing_median_age:ocean_proximity_ISLAND 1.094e+04 8.196e+03
## housing_median_age:ocean_proximity_NEAR_BAY -3.630e+02 1.817e+02
## housing_median_age:ocean_proximity_NEAR_OCEAN NA NA
## total_rooms:total_bedrooms -1.748e-03 1.575e-03
## total_rooms:population -4.932e-03 6.780e-04
## total_rooms:households 1.414e-02 2.379e-03
## total_rooms:median_income 1.300e+00 3.065e-01
## total_rooms:ocean_proximity_1H_OCEAN -3.189e+00 2.527e+00
## total_rooms:ocean_proximity_INLAND -1.975e+01 4.145e+00
## total_rooms:ocean_proximity_ISLAND 5.826e+01 9.104e+01
## total_rooms:ocean_proximity_NEAR_BAY 8.586e+00 3.937e+00
## total_rooms:ocean_proximity_NEAR_OCEAN NA NA
## total_bedrooms:population 2.138e-02 5.946e-03
## total_bedrooms:households -9.208e-02 8.707e-03
## total_bedrooms:median_income 5.287e+00 4.013e+00
## total_bedrooms:ocean_proximity_1H_OCEAN -2.270e+01 2.911e+01
## total_bedrooms:ocean_proximity_INLAND -7.214e+00 3.510e+01
## total_bedrooms:ocean_proximity_ISLAND NA NA
## total_bedrooms:ocean_proximity_NEAR_BAY -7.650e+01 4.786e+01
## total_bedrooms:ocean_proximity_NEAR_OCEAN NA NA
## population:households 1.521e-02 5.303e-03
## population:median_income -2.170e+00 7.432e-01
## population:ocean_proximity_1H_OCEAN -1.754e+01 3.360e+00
## population:ocean_proximity_INLAND 9.572e+00 4.998e+00
## population:ocean_proximity_ISLAND NA NA
## population:ocean_proximity_NEAR_BAY -2.589e+01 5.796e+00
## population:ocean_proximity_NEAR_OCEAN NA NA
## households:median_income 8.180e+00 4.786e+00
## households:ocean_proximity_1H_OCEAN 8.244e+01 3.265e+01
## households:ocean_proximity_INLAND 9.368e+01 3.654e+01
## households:ocean_proximity_ISLAND NA NA
## households:ocean_proximity_NEAR_BAY 9.399e+01 5.387e+01
## households:ocean_proximity_NEAR_OCEAN NA NA
## median_income:ocean_proximity_1H_OCEAN -2.129e+03 9.682e+02
## median_income:ocean_proximity_INLAND 8.789e+03 1.584e+03
## median_income:ocean_proximity_ISLAND NA NA
## median_income:ocean_proximity_NEAR_BAY -1.353e+03 1.340e+03
## median_income:ocean_proximity_NEAR_OCEAN NA NA
## ocean_proximity_1H_OCEAN:ocean_proximity_INLAND NA NA
## ocean_proximity_1H_OCEAN:ocean_proximity_ISLAND NA NA
## ocean_proximity_1H_OCEAN:ocean_proximity_NEAR_BAY NA NA
## ocean_proximity_1H_OCEAN:ocean_proximity_NEAR_OCEAN NA NA
## ocean_proximity_INLAND:ocean_proximity_ISLAND NA NA
## ocean_proximity_INLAND:ocean_proximity_NEAR_BAY NA NA
## ocean_proximity_INLAND:ocean_proximity_NEAR_OCEAN NA NA
## ocean_proximity_ISLAND:ocean_proximity_NEAR_BAY NA NA
## ocean_proximity_ISLAND:ocean_proximity_NEAR_OCEAN NA NA
## ocean_proximity_NEAR_BAY:ocean_proximity_NEAR_OCEAN NA NA
## t value Pr(>|t|)
## (Intercept) -8.518 < 2e-16 ***
## longitude -6.906 5.13e-12 ***
## latitude 10.198 < 2e-16 ***
## housing_median_age -10.015 < 2e-16 ***
## total_rooms 5.355 8.63e-08 ***
## total_bedrooms -3.522 0.000429 ***
## population 2.027 0.042646 *
## households -1.740 0.081887 .
## median_income -11.832 < 2e-16 ***
## ocean_proximity_1H_OCEAN 2.188 0.028690 *
## ocean_proximity_INLAND 4.155 3.26e-05 ***
## ocean_proximity_ISLAND 2.429 0.015141 *
## ocean_proximity_NEAR_BAY -18.629 < 2e-16 ***
## ocean_proximity_NEAR_OCEAN NA NA
## longitude:latitude 9.873 < 2e-16 ***
## longitude:housing_median_age -10.989 < 2e-16 ***
## longitude:total_rooms 5.780 7.59e-09 ***
## longitude:total_bedrooms -4.263 2.03e-05 ***
## longitude:population 2.583 0.009793 **
## longitude:households -1.684 0.092144 .
## longitude:median_income -12.846 < 2e-16 ***
## longitude:ocean_proximity_1H_OCEAN 1.478 0.139528
## longitude:ocean_proximity_INLAND 4.254 2.11e-05 ***
## longitude:ocean_proximity_ISLAND 2.201 0.027755 *
## longitude:ocean_proximity_NEAR_BAY -20.685 < 2e-16 ***
## longitude:ocean_proximity_NEAR_OCEAN NA NA
## latitude:housing_median_age -12.820 < 2e-16 ***
## latitude:total_rooms 6.461 1.06e-10 ***
## latitude:total_bedrooms -5.403 6.63e-08 ***
## latitude:population 3.564 0.000367 ***
## latitude:households -1.904 0.056912 .
## latitude:median_income -13.548 < 2e-16 ***
## latitude:ocean_proximity_1H_OCEAN -0.069 0.945267
## latitude:ocean_proximity_INLAND 3.445 0.000572 ***
## latitude:ocean_proximity_ISLAND 2.014 0.044046 *
## latitude:ocean_proximity_NEAR_BAY -14.926 < 2e-16 ***
## latitude:ocean_proximity_NEAR_OCEAN NA NA
## housing_median_age:total_rooms -4.952 7.41e-07 ***
## housing_median_age:total_bedrooms 2.752 0.005925 **
## housing_median_age:population -15.317 < 2e-16 ***
## housing_median_age:households 6.289 3.27e-10 ***
## housing_median_age:median_income 5.415 6.21e-08 ***
## housing_median_age:ocean_proximity_1H_OCEAN -1.588 0.112312
## housing_median_age:ocean_proximity_INLAND 3.837 0.000125 ***
## housing_median_age:ocean_proximity_ISLAND 1.335 0.182033
## housing_median_age:ocean_proximity_NEAR_BAY -1.998 0.045709 *
## housing_median_age:ocean_proximity_NEAR_OCEAN NA NA
## total_rooms:total_bedrooms -1.110 0.267014
## total_rooms:population -7.273 3.64e-13 ***
## total_rooms:households 5.943 2.85e-09 ***
## total_rooms:median_income 4.242 2.22e-05 ***
## total_rooms:ocean_proximity_1H_OCEAN -1.262 0.206949
## total_rooms:ocean_proximity_INLAND -4.766 1.89e-06 ***
## total_rooms:ocean_proximity_ISLAND 0.640 0.522242
## total_rooms:ocean_proximity_NEAR_BAY 2.181 0.029208 *
## total_rooms:ocean_proximity_NEAR_OCEAN NA NA
## total_bedrooms:population 3.595 0.000325 ***
## total_bedrooms:households -10.575 < 2e-16 ***
## total_bedrooms:median_income 1.317 0.187688
## total_bedrooms:ocean_proximity_1H_OCEAN -0.780 0.435500
## total_bedrooms:ocean_proximity_INLAND -0.206 0.837155
## total_bedrooms:ocean_proximity_ISLAND NA NA
## total_bedrooms:ocean_proximity_NEAR_BAY -1.598 0.109988
## total_bedrooms:ocean_proximity_NEAR_OCEAN NA NA
## population:households 2.868 0.004131 **
## population:median_income -2.920 0.003509 **
## population:ocean_proximity_1H_OCEAN -5.219 1.81e-07 ***
## population:ocean_proximity_INLAND 1.915 0.055494 .
## population:ocean_proximity_ISLAND NA NA
## population:ocean_proximity_NEAR_BAY -4.467 7.96e-06 ***
## population:ocean_proximity_NEAR_OCEAN NA NA
## households:median_income 1.709 0.087484 .
## households:ocean_proximity_1H_OCEAN 2.525 0.011586 *
## households:ocean_proximity_INLAND 2.564 0.010360 *
## households:ocean_proximity_ISLAND NA NA
## households:ocean_proximity_NEAR_BAY 1.745 0.081009 .
## households:ocean_proximity_NEAR_OCEAN NA NA
## median_income:ocean_proximity_1H_OCEAN -2.199 0.027868 *
## median_income:ocean_proximity_INLAND 5.548 2.93e-08 ***
## median_income:ocean_proximity_ISLAND NA NA
## median_income:ocean_proximity_NEAR_BAY -1.010 0.312660
## median_income:ocean_proximity_NEAR_OCEAN NA NA
## ocean_proximity_1H_OCEAN:ocean_proximity_INLAND NA NA
## ocean_proximity_1H_OCEAN:ocean_proximity_ISLAND NA NA
## ocean_proximity_1H_OCEAN:ocean_proximity_NEAR_BAY NA NA
## ocean_proximity_1H_OCEAN:ocean_proximity_NEAR_OCEAN NA NA
## ocean_proximity_INLAND:ocean_proximity_ISLAND NA NA
## ocean_proximity_INLAND:ocean_proximity_NEAR_BAY NA NA
## ocean_proximity_INLAND:ocean_proximity_NEAR_OCEAN NA NA
## ocean_proximity_ISLAND:ocean_proximity_NEAR_BAY NA NA
## ocean_proximity_ISLAND:ocean_proximity_NEAR_OCEAN NA NA
## ocean_proximity_NEAR_BAY:ocean_proximity_NEAR_OCEAN NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 63010 on 20364 degrees of freedom
## Multiple R-squared: 0.7031, Adjusted R-squared: 0.7021
## F-statistic: 709.1 on 68 and 20364 DF, p-value: < 2.2e-16
summary(m2)
##
## Call:
## lm(formula = cdf$median_house_value ~ ., data = cdf)
##
## Residuals:
## Min 1Q Median 3Q Max
## -434031 -34216 -6896 26017 347654
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.595e+06 1.090e+05 -14.629 < 2e-16 ***
## longitude -2.565e+04 8.681e+02 -29.549 < 2e-16 ***
## latitude -2.404e+04 8.607e+02 -27.926 < 2e-16 ***
## housing_median_age 8.739e+02 4.313e+01 20.264 < 2e-16 ***
## total_rooms 1.807e-01 9.392e-01 0.192 0.847425
## total_bedrooms -3.042e+01 9.033e+00 -3.368 0.000759 ***
## population 3.891e-03 1.454e+00 0.003 0.997865
## households 2.544e+01 9.478e+00 2.684 0.007275 **
## median_income 4.898e+04 1.075e+03 45.571 < 2e-16 ***
## ocean_proximity_1H_OCEAN -1.550e+05 3.931e+04 -3.944 8.04e-05 ***
## ocean_proximity_INLAND -1.910e+05 3.934e+04 -4.854 1.22e-06 ***
## ocean_proximity_NEAR_BAY -1.740e+05 3.934e+04 -4.423 9.78e-06 ***
## ocean_proximity_NEAR_OCEAN -1.558e+05 3.931e+04 -3.963 7.43e-05 ***
## rooms_per_household -3.102e+02 3.997e+02 -0.776 0.437738
## bedrooms_per_room 4.705e+05 4.234e+04 11.114 < 2e-16 ***
## population_per_household 8.032e+01 6.277e+01 1.280 0.200727
## log_total_rooms 3.764e+03 1.226e+04 0.307 0.758794
## log_total_bedrooms 9.310e+04 1.521e+04 6.120 9.55e-10 ***
## log_population -9.360e+04 5.238e+03 -17.870 < 2e-16 ***
## log_households 3.034e+04 8.443e+03 3.594 0.000327 ***
## log_median_income -3.517e+04 3.587e+03 -9.804 < 2e-16 ***
## log_rooms_per_household -7.860e+04 2.113e+04 -3.720 0.000200 ***
## log_bedrooms_per_room -7.995e+05 7.832e+04 -10.208 < 2e-16 ***
## log_population_per_household 8.899e+04 2.224e+04 4.002 6.30e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 55540 on 18047 degrees of freedom
## Multiple R-squared: 0.6604, Adjusted R-squared: 0.66
## F-statistic: 1526 on 23 and 18047 DF, p-value: < 2.2e-16
summary(m3)
##
## Call:
## lm(formula = cdf2$log_median_house_value ~ ., data = cdf2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.38989 -0.17200 -0.00884 0.16710 1.82186
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.647e-01 5.709e-01 1.340 0.180402
## longitude -1.501e-01 4.546e-03 -33.027 < 2e-16 ***
## latitude -1.451e-01 4.507e-03 -32.205 < 2e-16 ***
## housing_median_age 2.618e-03 2.258e-04 11.596 < 2e-16 ***
## total_rooms 1.879e-07 4.918e-06 0.038 0.969516
## total_bedrooms -7.879e-05 4.730e-05 -1.666 0.095754 .
## population 2.374e-07 7.615e-06 0.031 0.975132
## households 4.427e-05 4.963e-05 0.892 0.372432
## median_income 1.034e-01 5.628e-03 18.366 < 2e-16 ***
## ocean_proximity_1H_OCEAN -6.002e-01 2.058e-01 -2.916 0.003545 **
## ocean_proximity_INLAND -8.784e-01 2.060e-01 -4.264 2.02e-05 ***
## ocean_proximity_NEAR_BAY -7.023e-01 2.060e-01 -3.410 0.000652 ***
## ocean_proximity_NEAR_OCEAN -6.431e-01 2.058e-01 -3.124 0.001785 **
## rooms_per_household 4.496e-03 2.093e-03 2.148 0.031718 *
## bedrooms_per_room 4.086e+00 2.217e-01 18.432 < 2e-16 ***
## population_per_household 1.234e-03 3.287e-04 3.754 0.000175 ***
## log_total_rooms 2.815e-01 6.418e-02 4.387 1.16e-05 ***
## log_total_bedrooms 9.910e-02 7.966e-02 1.244 0.213467
## log_population -3.884e-01 2.742e-02 -14.163 < 2e-16 ***
## log_households 2.042e-01 4.421e-02 4.619 3.89e-06 ***
## log_median_income 3.595e-01 1.878e-02 19.143 < 2e-16 ***
## log_rooms_per_household -1.555e-01 1.106e-01 -1.406 0.159856
## log_bedrooms_per_room -4.693e+00 4.101e-01 -11.443 < 2e-16 ***
## log_population_per_household 8.890e-02 1.164e-01 0.764 0.445126
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2908 on 18047 degrees of freedom
## Multiple R-squared: 0.6959, Adjusted R-squared: 0.6955
## F-statistic: 1796 on 23 and 18047 DF, p-value: < 2.2e-16
summary(m4)
##
## Call:
## lm(formula = sdf$median_house_value ~ ., data = sdf)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.1862 -1.0126 -0.2196 0.7430 11.5640
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.533772 1.240278 4.462 8.18e-06 ***
## housing_median_age 0.090280 0.004287 21.061 < 2e-16 ***
## total_rooms 0.368137 0.139254 2.644 0.00821 **
## total_bedrooms -0.932254 0.212730 -4.382 1.18e-05 ***
## population 1.431798 0.164694 8.694 < 2e-16 ***
## households 0.377677 0.212463 1.778 0.07548 .
## median_income 1.489768 0.032026 46.517 < 2e-16 ***
## ocean_proximity_1H_OCEAN -5.332302 1.168395 -4.564 5.06e-06 ***
## ocean_proximity_INLAND -7.067375 1.168498 -6.048 1.49e-09 ***
## ocean_proximity_NEAR_BAY -5.588203 1.169057 -4.780 1.77e-06 ***
## ocean_proximity_NEAR_OCEAN -5.065813 1.168684 -4.335 1.47e-05 ***
## rooms_per_household -0.290859 0.180010 -1.616 0.10616
## bedrooms_per_room 0.591574 0.065764 8.995 < 2e-16 ***
## population_per_household -1.798775 0.339913 -5.292 1.22e-07 ***
## log_total_rooms -2.683633 0.648571 -4.138 3.52e-05 ***
## log_total_bedrooms 4.309844 0.942340 4.574 4.83e-06 ***
## log_population -9.460382 0.381341 -24.808 < 2e-16 ***
## log_households 3.993141 0.863544 4.624 3.79e-06 ***
## log_median_income -1.031279 0.105985 -9.730 < 2e-16 ***
## log_rooms_per_household 1.747278 0.707550 2.469 0.01354 *
## log_bedrooms_per_room -0.944423 0.324317 -2.912 0.00360 **
## log_population_per_household 9.368323 1.472751 6.361 2.05e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.651 on 18049 degrees of freedom
## Multiple R-squared: 0.6411, Adjusted R-squared: 0.6407
## F-statistic: 1535 on 21 and 18049 DF, p-value: < 2.2e-16
summary(m5)
## Warning in summary.lm(m5): essentially perfect fit: summary may be unreliable
##
## Call:
## lm(formula = sdf2$log_median_house_value ~ ., data = sdf2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.007e-16 -3.000e-18 1.000e-18 2.800e-18 1.412e-14
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.433e-16 7.917e-17 8.126e+00 4.71e-16 ***
## housing_median_age -1.102e-18 2.736e-19 -4.029e+00 5.63e-05 ***
## total_rooms -2.319e-16 8.889e-18 -2.609e+01 < 2e-16 ***
## total_bedrooms -3.702e-16 1.358e-17 -2.726e+01 < 2e-16 ***
## population 3.174e-17 1.051e-17 3.019e+00 0.00254 **
## households 5.387e-16 1.356e-17 3.972e+01 < 2e-16 ***
## median_income 4.931e-17 2.044e-18 2.412e+01 < 2e-16 ***
## ocean_proximity_1H_OCEAN 7.517e-17 7.458e-17 1.008e+00 0.31351
## ocean_proximity_INLAND 9.439e-17 7.458e-17 1.266e+00 0.20569
## ocean_proximity_NEAR_BAY 7.193e-17 7.462e-17 9.640e-01 0.33509
## ocean_proximity_NEAR_OCEAN 7.274e-17 7.460e-17 9.750e-01 0.32954
## rooms_per_household 1.277e-15 1.149e-17 1.111e+02 < 2e-16 ***
## bedrooms_per_room -1.593e-16 4.198e-18 -3.794e+01 < 2e-16 ***
## population_per_household -3.639e-17 2.170e-17 -1.677e+00 0.09350 .
## log_total_rooms 1.901e-15 4.140e-17 4.593e+01 < 2e-16 ***
## log_total_bedrooms 2.441e-15 6.015e-17 4.059e+01 < 2e-16 ***
## log_population -7.981e-18 2.434e-17 -3.280e-01 0.74301
## log_households -4.147e-15 5.512e-17 -7.525e+01 < 2e-16 ***
## log_median_income -9.769e-17 6.765e-18 -1.444e+01 < 2e-16 ***
## log_rooms_per_household 1.000e+00 4.516e-17 2.214e+16 < 2e-16 ***
## log_bedrooms_per_room 1.764e-17 2.070e-17 8.520e-01 0.39423
## log_population_per_household 2.912e-18 9.400e-17 3.100e-02 0.97529
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.054e-16 on 18049 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: 1
## F-statistic: 1.005e+33 on 21 and 18049 DF, p-value: < 2.2e-16
autoplot(m0)
autoplot(m1)
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 7 row(s) containing missing values (geom_path).
autoplot(m2)
autoplot(m3)
autoplot(m4)
autoplot(m5)
m0_adjr2 = summary(m0)$adj.r.squared
m1_adjr2 = summary(m1)$adj.r.squared
m2_adjr2 = summary(m2)$adj.r.squared
m3_adjr2 = summary(m3)$adj.r.squared
m4_adjr2 = summary(m4)$adj.r.squared
m5_adjr2 = summary(m5)$adj.r.squared
## Warning in summary.lm(m5): essentially perfect fit: summary may be unreliable
beginning_mods_results = data.frame(
"Total Predictors" =
c("m0" = extractAIC(m0)[1],
"m1" = extractAIC(m1)[1],
"m2" = extractAIC(m2)[1],
"m3" = extractAIC(m3)[1],
"m4" = extractAIC(m4)[1],
"m5" = extractAIC(m5)[1]
),
"AIC" =
c("m0" = extractAIC(m0)[2],
"m1" = extractAIC(m1)[2],
"m2" = extractAIC(m2)[2],
"m3" = extractAIC(m3)[2],
"m4" = extractAIC(m4)[2],
"m5" = extractAIC(m5)[2]
),
"Adj R-Squared" =
c("m0" = m0_adjr2,
"m1" = m1_adjr2,
"m2" = m2_adjr2,
"m3" = m3_adjr2,
"m4" = m4_adjr2,
"m5" = m5_adjr2
)
)
kable(beginning_mods_results, align = c("c", "r"))
| Total.Predictors | AIC | Adj.R.Squared | |
|---|---|---|---|
| m0 | 13 | 455132.64 | 0.6462561 |
| m1 | 69 | 451679.54 | 0.7020749 |
| m2 | 24 | 394869.82 | 0.6599973 |
| m3 | 24 | -44615.21 | 0.6955374 |
| m4 | 22 | 18146.77 | 0.6406785 |
| m5 | 22 | -1329599.98 | 1.0000000 |
library(sp)
library(maps)
##
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
##
## map
library(maptools)
## Checking rgeos availability: TRUE
## Please note that 'maptools' will be retired during 2023,
## plan transition at your earliest convenience;
## some functionality will be moved to 'sp'.
##
## Attaching package: 'maptools'
## The following object is masked from 'package:Hmisc':
##
## label
latlong_df <- cdf[,c(1,2)]
str(latlong_df)
## Classes 'data.table' and 'data.frame': 18071 obs. of 2 variables:
## $ longitude: num -122 -122 -122 -122 -122 ...
## $ latitude : num 37.9 37.9 37.8 37.9 37.9 ...
## - attr(*, ".internal.selfref")=<externalptr>
counties <- maps::map('county', fill=TRUE, col="transparent", plot=FALSE)
IDs <- sapply(strsplit(counties$names, ":"), function(x) x[1])
counties_sp <- map2SpatialPolygons(counties, IDs=IDs,
proj4string=CRS("+proj=longlat +datum=WGS84"))
# Convert pointsDF to a SpatialPoints object
pointsSP <- SpatialPoints(latlong_df,
proj4string=CRS("+proj=longlat +datum=WGS84"))
# Use 'over' to get _indices_ of the Polygons object containing each point
indices <- over(pointsSP, counties_sp)
# Return the county names of the Polygons object containing each point
countyNames <- sapply(counties_sp@polygons, function(x) x@ID)
my_data= countyNames[indices]
my_data=sub("california,", "", my_data)
mdf=cdf
mdf$county_name=toTitleCase(my_data)
mdf$county_name=as.factor(mdf$county_name)
mdf <- na.omit(mdf)
str(mdf)
## Classes 'data.table' and 'data.frame': 16781 obs. of 25 variables:
## $ longitude : num -122 -122 -122 -122 -122 ...
## $ latitude : num 37.9 37.9 37.8 37.9 37.9 ...
## $ housing_median_age : num 41 21 42 40 42 48 43 40 40 21 ...
## $ total_rooms : num 880 7099 2555 751 1639 ...
## $ total_bedrooms : num 129 1106 665 184 367 ...
## $ population : num 322 2401 1206 409 929 ...
## $ households : num 126 1138 595 166 366 ...
## $ median_income : num 8.33 8.3 2.08 1.36 1.71 ...
## $ median_house_value : num 452600 358500 226700 147500 159800 ...
## $ ocean_proximity_1H_OCEAN : int 0 0 0 0 0 0 0 0 0 0 ...
## $ ocean_proximity_INLAND : int 0 0 0 0 0 0 0 0 0 0 ...
## $ ocean_proximity_NEAR_BAY : int 1 1 1 1 1 1 1 1 1 1 ...
## $ ocean_proximity_NEAR_OCEAN : int 0 0 0 0 0 0 0 0 0 0 ...
## $ rooms_per_household : num 6.98 6.24 4.29 4.52 4.48 ...
## $ bedrooms_per_room : num 0.147 0.156 0.26 0.245 0.224 ...
## $ population_per_household : num 2.56 2.11 2.03 2.46 2.54 ...
## $ log_total_rooms : num 6.78 8.87 7.85 6.62 7.4 ...
## $ log_total_bedrooms : num 4.86 7.01 6.5 5.21 5.91 ...
## $ log_population : num 5.77 7.78 7.1 6.01 6.83 ...
## $ log_households : num 4.84 7.04 6.39 5.11 5.9 ...
## $ log_median_income : num 2.119 2.116 0.733 0.306 0.539 ...
## $ log_rooms_per_household : num 1.4 1.26 1.23 1.3 1.25 ...
## $ log_bedrooms_per_room : num 0.717 0.79 0.828 0.788 0.798 ...
## $ log_population_per_household: num 1.19 1.11 1.11 1.18 1.16 ...
## $ county_name : Factor w/ 60 levels "Alameda","Alpine",..: 1 1 1 1 1 1 1 1 1 1 ...
## - attr(*, ".internal.selfref")=<externalptr>
m6<- lm(mdf$median_house_value ~ . , mdf)
m7<- lm(mdf$median_house_value ~ . , mdf)
summary(m6)
##
## Call:
## lm(formula = mdf$median_house_value ~ ., data = mdf)
##
## Residuals:
## Min 1Q Median 3Q Max
## -375639 -27436 -5451 19908 341759
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.494e+06 2.329e+05 -10.710 < 2e-16 ***
## longitude -3.020e+04 1.823e+03 -16.565 < 2e-16 ***
## latitude -2.374e+04 2.758e+03 -8.606 < 2e-16 ***
## housing_median_age 8.873e+01 4.317e+01 2.055 0.039850 *
## total_rooms -3.924e-01 8.394e-01 -0.468 0.640110
## total_bedrooms -1.863e+01 8.108e+00 -2.298 0.021554 *
## population 4.922e-01 1.306e+00 0.377 0.706374
## households 1.386e+01 8.524e+00 1.626 0.104060
## median_income 4.546e+04 1.001e+03 45.404 < 2e-16 ***
## ocean_proximity_1H_OCEAN -1.357e+03 1.874e+03 -0.724 0.468904
## ocean_proximity_INLAND -7.554e+03 2.803e+03 -2.695 0.007038 **
## ocean_proximity_NEAR_BAY 5.209e+02 2.889e+03 0.180 0.856941
## ocean_proximity_NEAR_OCEAN NA NA NA NA
## rooms_per_household -2.912e+02 3.628e+02 -0.803 0.422213
## bedrooms_per_room 6.352e+05 3.844e+04 16.526 < 2e-16 ***
## population_per_household 1.263e+02 5.547e+01 2.278 0.022749 *
## log_total_rooms 1.085e+05 1.133e+04 9.578 < 2e-16 ***
## log_total_bedrooms -2.434e+04 1.410e+04 -1.727 0.084216 .
## log_population -9.570e+04 4.712e+03 -20.311 < 2e-16 ***
## log_households 4.069e+04 7.693e+03 5.289 1.24e-07 ***
## log_median_income -6.093e+04 3.296e+03 -18.489 < 2e-16 ***
## log_rooms_per_household -8.936e+04 1.899e+04 -4.705 2.56e-06 ***
## log_bedrooms_per_room -7.350e+05 7.355e+04 -9.992 < 2e-16 ***
## log_population_per_household 9.648e+04 1.970e+04 4.897 9.81e-07 ***
## county_nameAlpine 1.587e+04 2.848e+04 0.557 0.577363
## county_nameAmador 1.928e+03 1.000e+04 0.193 0.847182
## county_nameArizona,la Paz 3.135e+04 3.805e+04 0.824 0.410024
## county_nameButte -8.619e+03 7.146e+03 -1.206 0.227746
## county_nameCalaveras -9.997e+03 9.456e+03 -1.057 0.290457
## county_nameColusa -4.024e+04 1.423e+04 -2.828 0.004685 **
## county_nameContra Costa -9.207e+03 3.021e+03 -3.048 0.002309 **
## county_nameDel Norte -1.317e+04 2.181e+04 -0.604 0.545746
## county_nameEl Dorado 2.936e+04 6.620e+03 4.436 9.22e-06 ***
## county_nameFresno -2.756e+04 5.614e+03 -4.909 9.25e-07 ***
## county_nameGlenn -3.371e+04 1.110e+04 -3.036 0.002400 **
## county_nameHumboldt -6.415e+04 1.043e+04 -6.151 7.86e-10 ***
## county_nameImperial 1.095e+04 1.659e+04 0.660 0.509434
## county_nameInyo 3.831e+04 1.350e+04 2.837 0.004560 **
## county_nameKern -3.863e+04 8.594e+03 -4.495 7.01e-06 ***
## county_nameKings -3.279e+04 7.873e+03 -4.165 3.13e-05 ***
## county_nameLake -5.977e+04 6.941e+03 -8.611 < 2e-16 ***
## county_nameLassen 3.284e+04 1.364e+04 2.407 0.016098 *
## county_nameLos Angeles 6.213e+04 1.120e+04 5.549 2.92e-08 ***
## county_nameMadera -1.791e+04 7.187e+03 -2.493 0.012692 *
## county_nameMarin 7.092e+04 4.971e+03 14.268 < 2e-16 ***
## county_nameMariposa -3.904e+03 1.223e+04 -0.319 0.749623
## county_nameMendocino -4.742e+04 8.066e+03 -5.879 4.20e-09 ***
## county_nameMerced -1.808e+04 5.773e+03 -3.132 0.001741 **
## county_nameModoc 2.962e+04 2.052e+04 1.443 0.148940
## county_nameMono 4.553e+04 1.310e+04 3.477 0.000508 ***
## county_nameMonterey 3.014e+03 5.615e+03 0.537 0.591378
## county_nameNapa 4.055e+03 5.566e+03 0.729 0.466260
## county_nameNevada 4.071e+04 7.411e+03 5.493 4.00e-08 ***
## county_nameNevada,douglas 1.366e+05 4.875e+04 2.802 0.005083 **
## county_nameOrange 4.284e+04 1.213e+04 3.532 0.000413 ***
## county_namePlacer 3.207e+04 6.390e+03 5.018 5.26e-07 ***
## county_namePlumas 1.109e+04 1.104e+04 1.004 0.315486
## county_nameRiverside 1.808e+04 1.294e+04 1.397 0.162386
## county_nameSacramento -1.177e+04 4.292e+03 -2.742 0.006116 **
## county_nameSan Benito 5.224e+04 1.044e+04 5.003 5.71e-07 ***
## county_nameSan Bernardino 9.215e+03 1.197e+04 0.770 0.441557
## county_nameSan Diego 1.606e+04 1.456e+04 1.103 0.270199
## county_nameSan Francisco 6.972e+04 4.793e+03 14.547 < 2e-16 ***
## county_nameSan Joaquin -1.198e+04 3.996e+03 -2.997 0.002730 **
## county_nameSan Luis Obispo 1.969e+04 8.437e+03 2.334 0.019596 *
## county_nameSan Mateo 8.243e+04 3.943e+03 20.905 < 2e-16 ***
## county_nameSanta Barbara 7.590e+03 9.166e+03 0.828 0.407658
## county_nameSanta Clara 4.402e+04 3.276e+03 13.438 < 2e-16 ***
## county_nameSanta Cruz 3.665e+04 5.314e+03 6.897 5.51e-12 ***
## county_nameShasta -5.213e+03 9.269e+03 -0.562 0.573852
## county_nameSierra 2.755e+03 1.943e+04 0.142 0.887247
## county_nameSiskiyou -1.015e+04 1.343e+04 -0.756 0.449655
## county_nameSolano -3.203e+04 4.831e+03 -6.630 3.47e-11 ***
## county_nameSonoma 3.300e+03 4.573e+03 0.722 0.470478
## county_nameStanislaus -6.158e+02 4.399e+03 -0.140 0.888682
## county_nameSutter -1.898e+04 8.002e+03 -2.372 0.017714 *
## county_nameTehama -2.682e+04 9.723e+03 -2.759 0.005811 **
## county_nameTrinity -4.878e+04 1.245e+04 -3.918 8.96e-05 ***
## county_nameTulare -1.488e+04 7.089e+03 -2.099 0.035826 *
## county_nameTuolumne 1.539e+04 7.748e+03 1.986 0.047046 *
## county_nameVentura 3.441e+04 1.052e+04 3.271 0.001072 **
## county_nameYolo 5.240e+03 6.316e+03 0.830 0.406748
## county_nameYuba -1.914e+04 8.304e+03 -2.305 0.021183 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 48390 on 16699 degrees of freedom
## Multiple R-squared: 0.7257, Adjusted R-squared: 0.7243
## F-statistic: 545.3 on 81 and 16699 DF, p-value: < 2.2e-16
summary(m7)
##
## Call:
## lm(formula = mdf$median_house_value ~ ., data = mdf)
##
## Residuals:
## Min 1Q Median 3Q Max
## -375639 -27436 -5451 19908 341759
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.494e+06 2.329e+05 -10.710 < 2e-16 ***
## longitude -3.020e+04 1.823e+03 -16.565 < 2e-16 ***
## latitude -2.374e+04 2.758e+03 -8.606 < 2e-16 ***
## housing_median_age 8.873e+01 4.317e+01 2.055 0.039850 *
## total_rooms -3.924e-01 8.394e-01 -0.468 0.640110
## total_bedrooms -1.863e+01 8.108e+00 -2.298 0.021554 *
## population 4.922e-01 1.306e+00 0.377 0.706374
## households 1.386e+01 8.524e+00 1.626 0.104060
## median_income 4.546e+04 1.001e+03 45.404 < 2e-16 ***
## ocean_proximity_1H_OCEAN -1.357e+03 1.874e+03 -0.724 0.468904
## ocean_proximity_INLAND -7.554e+03 2.803e+03 -2.695 0.007038 **
## ocean_proximity_NEAR_BAY 5.209e+02 2.889e+03 0.180 0.856941
## ocean_proximity_NEAR_OCEAN NA NA NA NA
## rooms_per_household -2.912e+02 3.628e+02 -0.803 0.422213
## bedrooms_per_room 6.352e+05 3.844e+04 16.526 < 2e-16 ***
## population_per_household 1.263e+02 5.547e+01 2.278 0.022749 *
## log_total_rooms 1.085e+05 1.133e+04 9.578 < 2e-16 ***
## log_total_bedrooms -2.434e+04 1.410e+04 -1.727 0.084216 .
## log_population -9.570e+04 4.712e+03 -20.311 < 2e-16 ***
## log_households 4.069e+04 7.693e+03 5.289 1.24e-07 ***
## log_median_income -6.093e+04 3.296e+03 -18.489 < 2e-16 ***
## log_rooms_per_household -8.936e+04 1.899e+04 -4.705 2.56e-06 ***
## log_bedrooms_per_room -7.350e+05 7.355e+04 -9.992 < 2e-16 ***
## log_population_per_household 9.648e+04 1.970e+04 4.897 9.81e-07 ***
## county_nameAlpine 1.587e+04 2.848e+04 0.557 0.577363
## county_nameAmador 1.928e+03 1.000e+04 0.193 0.847182
## county_nameArizona,la Paz 3.135e+04 3.805e+04 0.824 0.410024
## county_nameButte -8.619e+03 7.146e+03 -1.206 0.227746
## county_nameCalaveras -9.997e+03 9.456e+03 -1.057 0.290457
## county_nameColusa -4.024e+04 1.423e+04 -2.828 0.004685 **
## county_nameContra Costa -9.207e+03 3.021e+03 -3.048 0.002309 **
## county_nameDel Norte -1.317e+04 2.181e+04 -0.604 0.545746
## county_nameEl Dorado 2.936e+04 6.620e+03 4.436 9.22e-06 ***
## county_nameFresno -2.756e+04 5.614e+03 -4.909 9.25e-07 ***
## county_nameGlenn -3.371e+04 1.110e+04 -3.036 0.002400 **
## county_nameHumboldt -6.415e+04 1.043e+04 -6.151 7.86e-10 ***
## county_nameImperial 1.095e+04 1.659e+04 0.660 0.509434
## county_nameInyo 3.831e+04 1.350e+04 2.837 0.004560 **
## county_nameKern -3.863e+04 8.594e+03 -4.495 7.01e-06 ***
## county_nameKings -3.279e+04 7.873e+03 -4.165 3.13e-05 ***
## county_nameLake -5.977e+04 6.941e+03 -8.611 < 2e-16 ***
## county_nameLassen 3.284e+04 1.364e+04 2.407 0.016098 *
## county_nameLos Angeles 6.213e+04 1.120e+04 5.549 2.92e-08 ***
## county_nameMadera -1.791e+04 7.187e+03 -2.493 0.012692 *
## county_nameMarin 7.092e+04 4.971e+03 14.268 < 2e-16 ***
## county_nameMariposa -3.904e+03 1.223e+04 -0.319 0.749623
## county_nameMendocino -4.742e+04 8.066e+03 -5.879 4.20e-09 ***
## county_nameMerced -1.808e+04 5.773e+03 -3.132 0.001741 **
## county_nameModoc 2.962e+04 2.052e+04 1.443 0.148940
## county_nameMono 4.553e+04 1.310e+04 3.477 0.000508 ***
## county_nameMonterey 3.014e+03 5.615e+03 0.537 0.591378
## county_nameNapa 4.055e+03 5.566e+03 0.729 0.466260
## county_nameNevada 4.071e+04 7.411e+03 5.493 4.00e-08 ***
## county_nameNevada,douglas 1.366e+05 4.875e+04 2.802 0.005083 **
## county_nameOrange 4.284e+04 1.213e+04 3.532 0.000413 ***
## county_namePlacer 3.207e+04 6.390e+03 5.018 5.26e-07 ***
## county_namePlumas 1.109e+04 1.104e+04 1.004 0.315486
## county_nameRiverside 1.808e+04 1.294e+04 1.397 0.162386
## county_nameSacramento -1.177e+04 4.292e+03 -2.742 0.006116 **
## county_nameSan Benito 5.224e+04 1.044e+04 5.003 5.71e-07 ***
## county_nameSan Bernardino 9.215e+03 1.197e+04 0.770 0.441557
## county_nameSan Diego 1.606e+04 1.456e+04 1.103 0.270199
## county_nameSan Francisco 6.972e+04 4.793e+03 14.547 < 2e-16 ***
## county_nameSan Joaquin -1.198e+04 3.996e+03 -2.997 0.002730 **
## county_nameSan Luis Obispo 1.969e+04 8.437e+03 2.334 0.019596 *
## county_nameSan Mateo 8.243e+04 3.943e+03 20.905 < 2e-16 ***
## county_nameSanta Barbara 7.590e+03 9.166e+03 0.828 0.407658
## county_nameSanta Clara 4.402e+04 3.276e+03 13.438 < 2e-16 ***
## county_nameSanta Cruz 3.665e+04 5.314e+03 6.897 5.51e-12 ***
## county_nameShasta -5.213e+03 9.269e+03 -0.562 0.573852
## county_nameSierra 2.755e+03 1.943e+04 0.142 0.887247
## county_nameSiskiyou -1.015e+04 1.343e+04 -0.756 0.449655
## county_nameSolano -3.203e+04 4.831e+03 -6.630 3.47e-11 ***
## county_nameSonoma 3.300e+03 4.573e+03 0.722 0.470478
## county_nameStanislaus -6.158e+02 4.399e+03 -0.140 0.888682
## county_nameSutter -1.898e+04 8.002e+03 -2.372 0.017714 *
## county_nameTehama -2.682e+04 9.723e+03 -2.759 0.005811 **
## county_nameTrinity -4.878e+04 1.245e+04 -3.918 8.96e-05 ***
## county_nameTulare -1.488e+04 7.089e+03 -2.099 0.035826 *
## county_nameTuolumne 1.539e+04 7.748e+03 1.986 0.047046 *
## county_nameVentura 3.441e+04 1.052e+04 3.271 0.001072 **
## county_nameYolo 5.240e+03 6.316e+03 0.830 0.406748
## county_nameYuba -1.914e+04 8.304e+03 -2.305 0.021183 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 48390 on 16699 degrees of freedom
## Multiple R-squared: 0.7257, Adjusted R-squared: 0.7243
## F-statistic: 545.3 on 81 and 16699 DF, p-value: < 2.2e-16
autoplot(m6)
autoplot(m7)
m6_adjr2 = summary(m6)$adj.r.squared
m7_adjr2 = summary(m7)$adj.r.squared
beginning_mods_results = data.frame(
"Total Predictors" =
c("m6" = extractAIC(m6)[1],
"m7" = extractAIC(m7)[1]
),
"AIC" =
c("m6" = extractAIC(m6)[2],
"m7" = extractAIC(m7)[2]
),
"Adj R-Squared" =
c("m6" = m6_adjr2,
"m7" = m7_adjr2
)
)
kable(beginning_mods_results, align = c("c", "r"))
| Total.Predictors | AIC | Adj.R.Squared | |
|---|---|---|---|
| m6 | 82 | 362116.5 | 0.7243287 |
| m7 | 82 | 362116.5 | 0.7243287 |
library(dplyr)
str(cdf)
## Classes 'data.table' and 'data.frame': 18071 obs. of 24 variables:
## $ longitude : num -122 -122 -122 -122 -122 ...
## $ latitude : num 37.9 37.9 37.8 37.9 37.9 ...
## $ housing_median_age : num 41 21 42 40 42 41 48 48 43 40 ...
## $ total_rooms : num 880 7099 2555 751 1639 ...
## $ total_bedrooms : num 129 1106 665 184 367 ...
## $ population : num 322 2401 1206 409 929 ...
## $ households : num 126 1138 595 166 366 ...
## $ median_income : num 8.33 8.3 2.08 1.36 1.71 ...
## $ median_house_value : num 452600 358500 226700 147500 159800 ...
## $ ocean_proximity_1H_OCEAN : int 0 0 0 0 0 0 0 0 0 0 ...
## $ ocean_proximity_INLAND : int 0 0 0 0 0 0 0 0 0 0 ...
## $ ocean_proximity_NEAR_BAY : int 1 1 1 1 1 1 1 1 1 1 ...
## $ ocean_proximity_NEAR_OCEAN : int 0 0 0 0 0 0 0 0 0 0 ...
## $ rooms_per_household : num 6.98 6.24 4.29 4.52 4.48 ...
## $ bedrooms_per_room : num 0.147 0.156 0.26 0.245 0.224 ...
## $ population_per_household : num 2.56 2.11 2.03 2.46 2.54 ...
## $ log_total_rooms : num 6.78 8.87 7.85 6.62 7.4 ...
## $ log_total_bedrooms : num 4.86 7.01 6.5 5.21 5.91 ...
## $ log_population : num 5.77 7.78 7.1 6.01 6.83 ...
## $ log_households : num 4.84 7.04 6.39 5.11 5.9 ...
## $ log_median_income : num 2.119 2.116 0.733 0.306 0.539 ...
## $ log_rooms_per_household : num 1.4 1.26 1.23 1.3 1.25 ...
## $ log_bedrooms_per_room : num 0.717 0.79 0.828 0.788 0.798 ...
## $ log_population_per_household: num 1.19 1.11 1.11 1.18 1.16 ...
## - attr(*, ".internal.selfref")=<externalptr>
cor(cdf)
## longitude latitude housing_median_age
## longitude 1.0000000000 -0.922364020 -0.02308773
## latitude -0.9223640199 1.000000000 -0.06172033
## housing_median_age -0.0230877337 -0.061720327 1.00000000
## total_rooms 0.0336844030 -0.026127082 -0.37476300
## total_bedrooms 0.0630945239 -0.063519194 -0.33130972
## population 0.0888992366 -0.105798593 -0.27718377
## households 0.0493879355 -0.069338040 -0.31045185
## median_income -0.0186060804 -0.074272054 -0.19793210
## median_house_value -0.0232645793 -0.172011621 0.01381800
## ocean_proximity_1H_OCEAN 0.2854194785 -0.428637043 0.11762475
## ocean_proximity_INLAND -0.0922112705 0.386166117 -0.21969804
## ocean_proximity_NEAR_BAY -0.4117854923 0.307068376 0.12337611
## ocean_proximity_NEAR_OCEAN 0.0421879553 -0.162919971 0.03724211
## rooms_per_household -0.0384494147 0.130983264 -0.17876200
## bedrooms_per_room 0.1157156990 -0.143056445 0.14426166
## population_per_household 0.0004556728 0.002566498 0.02155349
## log_total_rooms 0.0209708958 -0.024666645 -0.32481705
## log_total_bedrooms 0.0576674469 -0.068843342 -0.27763584
## log_population 0.1030874510 -0.141221893 -0.21160782
## log_households 0.0532614785 -0.091996515 -0.24197414
## log_median_income -0.0198907447 -0.075912691 -0.18403576
## log_rooms_per_household -0.0700990514 0.151488923 -0.05157560
## log_bedrooms_per_room 0.1105947173 -0.134860118 -0.02612265
## log_population_per_household 0.0623846343 -0.046976566 0.10572349
## total_rooms total_bedrooms population
## longitude 0.0336844030 0.063094524 0.08889924
## latitude -0.0261270816 -0.063519194 -0.10579859
## housing_median_age -0.3747630026 -0.331309723 -0.27718377
## total_rooms 1.0000000000 0.935422796 0.86023006
## total_bedrooms 0.9354227956 1.000000000 0.88021200
## population 0.8602300574 0.880212004 1.00000000
## households 0.9224226021 0.978746198 0.91068245
## median_income 0.2227087874 0.020802496 0.04015941
## median_house_value 0.1532913001 0.079970469 0.02227064
## ocean_proximity_1H_OCEAN -0.0162807691 0.012381030 0.06951887
## ocean_proximity_INLAND 0.0263535919 -0.013435002 -0.03799835
## ocean_proximity_NEAR_BAY 0.0003676967 -0.003761993 -0.03433719
## ocean_proximity_NEAR_OCEAN -0.0134742604 0.003929836 -0.02140617
## rooms_per_household 0.1397245138 0.013422080 -0.07445637
## bedrooms_per_room -0.1875404824 0.076759152 0.03409933
## population_per_household -0.0241525895 -0.027629035 0.06532818
## log_total_rooms 0.7940511233 0.771069090 0.69415683
## log_total_bedrooms 0.7506468980 0.817471436 0.72221616
## log_population 0.6868776974 0.725579093 0.79772220
## log_households 0.7314149724 0.789293297 0.73601806
## log_median_income 0.2322828171 0.042326951 0.05389188
## log_rooms_per_household -0.1963873711 -0.327411813 -0.35015483
## log_bedrooms_per_room 0.2734459719 0.491888221 0.41129794
## log_population_per_household -0.2756666873 -0.317412823 -0.09722667
## households median_income median_house_value
## longitude 0.049387935 -0.018606080 -0.02326458
## latitude -0.069338040 -0.074272054 -0.17201162
## housing_median_age -0.310451853 -0.197932095 0.01381800
## total_rooms 0.922422602 0.222708787 0.15329130
## total_bedrooms 0.978746198 0.020802496 0.07997047
## population 0.910682455 0.040159411 0.02227064
## households 1.000000000 0.045468585 0.09948996
## median_income 0.045468585 1.000000000 0.66577485
## median_house_value 0.099489960 0.665774849 1.00000000
## ocean_proximity_1H_OCEAN 0.038132019 0.184800134 0.32103851
## ocean_proximity_INLAND -0.047228557 -0.228050323 -0.50301677
## ocean_proximity_NEAR_BAY 0.005665539 0.067867661 0.11481952
## ocean_proximity_NEAR_OCEAN 0.006090457 -0.005749940 0.14500129
## rooms_per_household -0.078929107 0.311129787 0.11363592
## bedrooms_per_room 0.058160385 -0.634558982 -0.23391359
## population_per_household -0.026534027 0.025866833 -0.01991006
## log_total_rooms 0.769172585 0.234052911 0.17626147
## log_total_bedrooms 0.808594866 0.009043832 0.08548597
## log_population 0.759431749 0.037795960 0.02962732
## log_households 0.815401150 0.042968702 0.11003767
## log_median_income 0.067472361 0.958447829 0.62987265
## log_rooms_per_household -0.384828841 0.256478728 0.05214824
## log_bedrooms_per_room 0.474746017 -0.478212118 -0.15192829
## log_population_per_household -0.313887439 -0.016494389 -0.15963422
## ocean_proximity_1H_OCEAN ocean_proximity_INLAND
## longitude 0.285419479 -0.09221127
## latitude -0.428637043 0.38616612
## housing_median_age 0.117624750 -0.21969804
## total_rooms -0.016280769 0.02635359
## total_bedrooms 0.012381030 -0.01343500
## population 0.069518865 -0.03799835
## households 0.038132019 -0.04722856
## median_income 0.184800134 -0.22805032
## median_house_value 0.321038511 -0.50301677
## ocean_proximity_1H_OCEAN 1.000000000 -0.65541248
## ocean_proximity_INLAND -0.655412478 1.00000000
## ocean_proximity_NEAR_BAY -0.267518416 -0.21501585
## ocean_proximity_NEAR_OCEAN -0.341321517 -0.27433452
## rooms_per_household -0.126176671 0.18085036
## bedrooms_per_room 0.104289188 -0.14222788
## population_per_household -0.002092085 0.01133422
## log_total_rooms 0.009729217 -0.01091605
## log_total_bedrooms 0.038781199 -0.05283844
## log_population 0.116629610 -0.09485921
## log_households 0.073411382 -0.09864180
## log_median_income 0.185238287 -0.23232746
## log_rooms_per_household -0.144206718 0.19766611
## log_bedrooms_per_room 0.094314045 -0.12904802
## log_population_per_household 0.033718204 0.04757230
## ocean_proximity_NEAR_BAY
## longitude -0.4117854923
## latitude 0.3070683763
## housing_median_age 0.1233761099
## total_rooms 0.0003676967
## total_bedrooms -0.0037619932
## population -0.0343371915
## households 0.0056655389
## median_income 0.0678676612
## median_house_value 0.1148195192
## ocean_proximity_1H_OCEAN -0.2675184160
## ocean_proximity_INLAND -0.2150158524
## ocean_proximity_NEAR_BAY 1.0000000000
## ocean_proximity_NEAR_OCEAN -0.1119745800
## rooms_per_household -0.0201352195
## bedrooms_per_room -0.0210359696
## population_per_household -0.0120023879
## log_total_rooms -0.0007979923
## log_total_bedrooms -0.0080782466
## log_population -0.0378127369
## log_households 0.0036637247
## log_median_income 0.0633516305
## log_rooms_per_household -0.0113660622
## log_bedrooms_per_room -0.0232309601
## log_population_per_household -0.0682139322
## ocean_proximity_NEAR_OCEAN rooms_per_household
## longitude 0.042187955 -0.038449415
## latitude -0.162919971 0.130983264
## housing_median_age 0.037242109 -0.178761998
## total_rooms -0.013474260 0.139724514
## total_bedrooms 0.003929836 0.013422080
## population -0.021406165 -0.074456368
## households 0.006090457 -0.078929107
## median_income -0.005749940 0.311129787
## median_house_value 0.145001289 0.113635921
## ocean_proximity_1H_OCEAN -0.341321517 -0.126176671
## ocean_proximity_INLAND -0.274334523 0.180850360
## ocean_proximity_NEAR_BAY -0.111974580 -0.020135220
## ocean_proximity_NEAR_OCEAN 1.000000000 -0.053486583
## rooms_per_household -0.053486583 1.000000000
## bedrooms_per_room 0.064356659 -0.415270347
## population_per_household -0.003237739 -0.007216777
## log_total_rooms 0.002008713 0.139160080
## log_total_bedrooms 0.024397651 0.001579552
## log_population -0.007586525 -0.177676942
## log_households 0.028601981 -0.162566947
## log_median_income 0.003446060 0.295219340
## log_rooms_per_household -0.057864508 0.659298701
## log_bedrooms_per_room 0.062433286 -0.298376578
## log_population_per_household -0.062813422 0.038223326
## bedrooms_per_room population_per_household
## longitude 0.1157156990 0.0004556728
## latitude -0.1430564446 0.0025664977
## housing_median_age 0.1442616560 0.0215534933
## total_rooms -0.1875404824 -0.0241525895
## total_bedrooms 0.0767591524 -0.0276290346
## population 0.0340993331 0.0653281812
## households 0.0581603852 -0.0265340274
## median_income -0.6345589816 0.0258668333
## median_house_value -0.2339135880 -0.0199100594
## ocean_proximity_1H_OCEAN 0.1042891883 -0.0020920846
## ocean_proximity_INLAND -0.1422278798 0.0113342227
## ocean_proximity_NEAR_BAY -0.0210359696 -0.0120023879
## ocean_proximity_NEAR_OCEAN 0.0643566593 -0.0032377391
## rooms_per_household -0.4152703470 -0.0072167769
## bedrooms_per_room 1.0000000000 0.0043603587
## population_per_household 0.0043603587 1.0000000000
## log_total_rooms -0.2477146086 -0.0835735056
## log_total_bedrooms 0.0617609405 -0.0846318913
## log_population 0.0172628437 0.0417694650
## log_households 0.0321968961 -0.0810461733
## log_median_income -0.6489189054 0.0145874010
## log_rooms_per_household -0.4207453003 0.0706099308
## log_bedrooms_per_room 0.7211657237 -0.0853377761
## log_population_per_household -0.0005781063 0.5528646835
## log_total_rooms log_total_bedrooms log_population
## longitude 0.0209708958 0.057667447 0.103087451
## latitude -0.0246666450 -0.068843342 -0.141221893
## housing_median_age -0.3248170511 -0.277635842 -0.211607821
## total_rooms 0.7940511233 0.750646898 0.686877697
## total_bedrooms 0.7710690903 0.817471436 0.725579093
## population 0.6941568286 0.722216158 0.797722200
## households 0.7691725851 0.808594866 0.759431749
## median_income 0.2340529114 0.009043832 0.037795960
## median_house_value 0.1762614681 0.085485972 0.029627317
## ocean_proximity_1H_OCEAN 0.0097292173 0.038781199 0.116629610
## ocean_proximity_INLAND -0.0109160491 -0.052838437 -0.094859210
## ocean_proximity_NEAR_BAY -0.0007979923 -0.008078247 -0.037812737
## ocean_proximity_NEAR_OCEAN 0.0020087127 0.024397651 -0.007586525
## rooms_per_household 0.1391600797 0.001579552 -0.177676942
## bedrooms_per_room -0.2477146086 0.061760940 0.017262844
## population_per_household -0.0835735056 -0.084631891 0.041769465
## log_total_rooms 1.0000000000 0.949123739 0.863753047
## log_total_bedrooms 0.9491237386 1.000000000 0.895265226
## log_population 0.8637530465 0.895265226 1.000000000
## log_households 0.9326880798 0.972298862 0.933558108
## log_median_income 0.2671505080 0.045589068 0.069285155
## log_rooms_per_household -0.3705381193 -0.520693264 -0.601721079
## log_bedrooms_per_room 0.4350723248 0.686033805 0.586311406
## log_population_per_household -0.4906483419 -0.511370349 -0.230411960
## log_households log_median_income
## longitude 0.053261479 -0.01989074
## latitude -0.091996515 -0.07591269
## housing_median_age -0.241974142 -0.18403576
## total_rooms 0.731414972 0.23228282
## total_bedrooms 0.789293297 0.04232695
## population 0.736018062 0.05389188
## households 0.815401150 0.06747236
## median_income 0.042968702 0.95844783
## median_house_value 0.110037671 0.62987265
## ocean_proximity_1H_OCEAN 0.073411382 0.18523829
## ocean_proximity_INLAND -0.098641795 -0.23232746
## ocean_proximity_NEAR_BAY 0.003663725 0.06335163
## ocean_proximity_NEAR_OCEAN 0.028601981 0.00344606
## rooms_per_household -0.162566947 0.29521934
## bedrooms_per_room 0.032196896 -0.64891891
## population_per_household -0.081046173 0.01458740
## log_total_rooms 0.932688080 0.26715051
## log_total_bedrooms 0.972298862 0.04558907
## log_population 0.933558108 0.06928515
## log_households 1.000000000 0.08102491
## log_median_income 0.081024906 1.00000000
## log_rooms_per_household -0.630269747 0.22346051
## log_bedrooms_per_room 0.645308828 -0.45100734
## log_population_per_household -0.507470925 -0.04566184
## log_rooms_per_household log_bedrooms_per_room
## longitude -0.07009905 0.11059472
## latitude 0.15148892 -0.13486012
## housing_median_age -0.05157560 -0.02612265
## total_rooms -0.19638737 0.27344597
## total_bedrooms -0.32741181 0.49188822
## population -0.35015483 0.41129794
## households -0.38482884 0.47474602
## median_income 0.25647873 -0.47821212
## median_house_value 0.05214824 -0.15192829
## ocean_proximity_1H_OCEAN -0.14420672 0.09431404
## ocean_proximity_INLAND 0.19766611 -0.12904802
## ocean_proximity_NEAR_BAY -0.01136606 -0.02323096
## ocean_proximity_NEAR_OCEAN -0.05786451 0.06243329
## rooms_per_household 0.65929870 -0.29837658
## bedrooms_per_room -0.42074530 0.72116572
## population_per_household 0.07060993 -0.08533778
## log_total_rooms -0.37053812 0.43507232
## log_total_bedrooms -0.52069326 0.68603380
## log_population -0.60172108 0.58631141
## log_households -0.63026975 0.64530883
## log_median_income 0.22346051 -0.45100734
## log_rooms_per_household 1.00000000 -0.69434825
## log_bedrooms_per_room -0.69434825 1.00000000
## log_population_per_household 0.46168538 -0.39208473
## log_population_per_household
## longitude 0.0623846343
## latitude -0.0469765656
## housing_median_age 0.1057234910
## total_rooms -0.2756666873
## total_bedrooms -0.3174128230
## population -0.0972266706
## households -0.3138874388
## median_income -0.0164943895
## median_house_value -0.1596342228
## ocean_proximity_1H_OCEAN 0.0337182043
## ocean_proximity_INLAND 0.0475723024
## ocean_proximity_NEAR_BAY -0.0682139322
## ocean_proximity_NEAR_OCEAN -0.0628134216
## rooms_per_household 0.0382233257
## bedrooms_per_room -0.0005781063
## population_per_household 0.5528646835
## log_total_rooms -0.4906483419
## log_total_bedrooms -0.5113703493
## log_population -0.2304119597
## log_households -0.5074709249
## log_median_income -0.0456618362
## log_rooms_per_household 0.4616853762
## log_bedrooms_per_room -0.3920847272
## log_population_per_household 1.0000000000
x.train <- sample_frac(tbl = cdf, replace = FALSE, size = 0.80)
x.test <- anti_join(cdf, x.train)
## Joining, by = c("longitude", "latitude", "housing_median_age", "total_rooms",
## "total_bedrooms", "population", "households", "median_income",
## "median_house_value", "ocean_proximity_1H_OCEAN", "ocean_proximity_INLAND",
## "ocean_proximity_NEAR_BAY", "ocean_proximity_NEAR_OCEAN",
## "rooms_per_household", "bedrooms_per_room", "population_per_household",
## "log_total_rooms", "log_total_bedrooms", "log_population", "log_households",
## "log_median_income", "log_rooms_per_household", "log_bedrooms_per_room",
## "log_population_per_household")
summary(x.train)
## longitude latitude housing_median_age total_rooms
## Min. :-124.3 Min. :32.54 Min. : 1.00 Min. : 2
## 1st Qu.:-121.5 1st Qu.:33.92 1st Qu.:17.00 1st Qu.: 1457
## Median :-118.5 Median :34.24 Median :27.00 Median : 2143
## Mean :-119.5 Mean :35.60 Mean :26.64 Mean : 2672
## 3rd Qu.:-118.0 3rd Qu.:37.67 3rd Qu.:35.00 3rd Qu.: 3183
## Max. :-114.3 Max. :41.95 Max. :48.00 Max. :39320
## total_bedrooms population households median_income
## Min. : 2.0 Min. : 3 Min. : 2.0 Min. : 0.4999
## 1st Qu.: 300.0 1st Qu.: 815 1st Qu.: 284.0 1st Qu.: 2.5389
## Median : 442.0 Median : 1208 Median : 415.0 Median : 3.4712
## Mean : 549.1 Mean : 1473 Mean : 509.4 Mean : 3.6923
## 3rd Qu.: 659.0 3rd Qu.: 1781 3rd Qu.: 614.0 3rd Qu.: 4.6118
## Max. :6210.0 Max. :35682 Max. :5358.0 Max. :13.1477
## median_house_value ocean_proximity_1H_OCEAN ocean_proximity_INLAND
## Min. : 14999 Min. :0.0000 Min. :0.0000
## 1st Qu.:115000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :171100 Median :0.0000 Median :0.0000
## Mean :188957 Mean :0.4459 Mean :0.3474
## 3rd Qu.:242100 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :499100 Max. :1.0000 Max. :1.0000
## ocean_proximity_NEAR_BAY ocean_proximity_NEAR_OCEAN rooms_per_household
## Min. :0.00000 Min. :0.0000 Min. : 0.8461
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.: 4.4410
## Median :0.00000 Median :0.0000 Median : 5.2209
## Mean :0.08024 Mean :0.1263 Mean : 5.3947
## 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.: 5.9895
## Max. :1.00000 Max. :1.0000 Max. :132.5333
## bedrooms_per_room population_per_household log_total_rooms
## Min. :0.1128 Min. : 0.750 Min. : 0.6931
## 1st Qu.:0.1771 1st Qu.: 2.472 1st Qu.: 7.2841
## Median :0.2036 Median : 2.857 Median : 7.6700
## Mean :0.2139 Mean : 3.152 Mean : 7.6446
## 3rd Qu.:0.2401 3rd Qu.: 3.326 3rd Qu.: 8.0656
## Max. :1.0000 Max. :1243.333 Max. :10.5795
## log_total_bedrooms log_population log_households log_median_income
## Min. :0.6931 Min. : 1.099 Min. :0.6931 Min. :-0.6933
## 1st Qu.:5.7038 1st Qu.: 6.703 1st Qu.:5.6490 1st Qu.: 0.9317
## Median :6.0913 Median : 7.097 Median :6.0283 Median : 1.2445
## Mean :6.0736 Mean : 7.061 Mean :6.0017 Mean : 1.2155
## 3rd Qu.:6.4907 3rd Qu.: 7.485 3rd Qu.:6.4200 3rd Qu.: 1.5286
## Max. :8.7339 Max. :10.482 Max. :8.5863 Max. : 2.5762
## log_rooms_per_household log_bedrooms_per_room log_population_per_household
## Min. :0.9349 Min. :0.3372 Min. :0.7925
## 1st Qu.:1.2410 1st Qu.:0.7727 1st Qu.:1.1478
## Median :1.2734 Median :0.7933 Median :1.1749
## Mean :1.2803 Mean :0.7930 Mean :1.1815
## 3rd Qu.:1.3058 3rd Qu.:0.8144 3rd Qu.:1.2057
## Max. :5.6147 Max. :1.0000 Max. :4.9768
model <- lm(x.train$median_house_value ~ x.train$latitude + x.train$median_income + x.train$ocean_proximity_1H_OCEAN + x.train$ocean_proximity_INLAND + x.train$bedrooms_per_room + x.train$log_population_per_household ,x.train)
summary(model)
##
## Call:
## lm(formula = x.train$median_house_value ~ x.train$latitude +
## x.train$median_income + x.train$ocean_proximity_1H_OCEAN +
## x.train$ocean_proximity_INLAND + x.train$bedrooms_per_room +
## x.train$log_population_per_household, data = x.train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -373267 -36922 -9976 25038 538284
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 155174.7 13036.8 11.903 < 2e-16 ***
## x.train$latitude 516.1 262.2 1.968 0.0491 *
## x.train$median_income 42512.3 455.5 93.332 < 2e-16 ***
## x.train$ocean_proximity_1H_OCEAN -8525.7 1382.1 -6.169 7.08e-10 ***
## x.train$ocean_proximity_INLAND -70061.2 1470.9 -47.631 < 2e-16 ***
## x.train$bedrooms_per_room 295385.9 12626.8 23.394 < 2e-16 ***
## x.train$log_population_per_household -149470.2 6087.8 -24.552 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 60090 on 14450 degrees of freedom
## Multiple R-squared: 0.602, Adjusted R-squared: 0.6018
## F-statistic: 3642 on 6 and 14450 DF, p-value: < 2.2e-16
test_actual = x.test$median_house_value
# the predicted house values for the test set
test_predictions = predict(model, x.test)
## Warning: 'newdata' had 3614 rows but variables found have 14457 rows
# the RMSE
test_rmse = sqrt(mean((test_actual - test_predictions)^2))
## Warning in test_actual - test_predictions: longer object length is not a
## multiple of shorter object length
# the percentage error
test_rmse
## [1] 120726.2
x.train <- sample_frac(tbl = df, replace = FALSE, size = 0.80)
x.test <- anti_join(df, x.train)
## Joining, by = c("longitude", "latitude", "housing_median_age", "total_rooms",
## "total_bedrooms", "population", "households", "median_income",
## "median_house_value", "ocean_proximity_1H_OCEAN", "ocean_proximity_INLAND",
## "ocean_proximity_ISLAND", "ocean_proximity_NEAR_BAY",
## "ocean_proximity_NEAR_OCEAN")
model <- lm(x.train$median_house_value ~ .,x.train)
summary(model)
##
## Call:
## lm(formula = x.train$median_house_value ~ ., data = x.train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -557344 -42673 -10637 28533 761915
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.234e+06 9.821e+04 -22.747 < 2e-16 ***
## longitude -2.639e+04 1.133e+03 -23.290 < 2e-16 ***
## latitude -2.498e+04 1.116e+03 -22.380 < 2e-16 ***
## housing_median_age 1.081e+03 4.883e+01 22.144 < 2e-16 ***
## total_rooms -5.951e+00 8.879e-01 -6.702 2.12e-11 ***
## total_bedrooms 1.006e+02 7.620e+00 13.205 < 2e-16 ***
## population -3.719e+01 1.182e+00 -31.472 < 2e-16 ***
## households 4.606e+01 8.236e+00 5.593 2.27e-08 ***
## median_income 3.926e+04 3.776e+02 103.993 < 2e-16 ***
## ocean_proximity_1H_OCEAN -3.472e+03 1.738e+03 -1.998 0.045742 *
## ocean_proximity_INLAND -4.388e+04 2.496e+03 -17.577 < 2e-16 ***
## ocean_proximity_ISLAND 1.223e+05 3.959e+04 3.089 0.002009 **
## ocean_proximity_NEAR_BAY -8.682e+03 2.425e+03 -3.579 0.000345 ***
## ocean_proximity_NEAR_OCEAN NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 68490 on 16333 degrees of freedom
## Multiple R-squared: 0.647, Adjusted R-squared: 0.6467
## F-statistic: 2494 on 12 and 16333 DF, p-value: < 2.2e-16
test_actual = x.test$median_house_value
# the predicted house values for the test set
test_predictions = predict(m1, x.test)
## Warning in predict.lm(m1, x.test): prediction from a rank-deficient fit may be
## misleading
# the RMSE
test_rmse = sqrt(mean((test_actual - test_predictions)^2))
# the percentage error
test_rmse
## [1] 64675.25