X <- c(17, 21, 35, 39, 50, 65)
Y <- c(132, 150, 160, 162, 149, 170)
Y_avg <- mean(Y)
X_avg <- mean(X)
B1 <- (sum(X*Y) - 6 * X_avg * Y_avg) / (sum(X^2) - 6 * X_avg ^ 2)
B0 <- Y_avg - B1 * X_avg
B0
## [1] 132.913
B1
## [1] 0.5529606
data(anscombe)
View(anscombe)
# method 1
plot(anscombe$x1, anscombe$y1)
# method 2
plot(y1 ~ x1, data = anscombe )
fit <- lm(y1 ~ x1, data = anscombe)
fit
##
## Call:
## lm(formula = y1 ~ x1, data = anscombe)
##
## Coefficients:
## (Intercept) x1
## 3.0001 0.5001
summary(fit)
##
## Call:
## lm(formula = y1 ~ x1, data = anscombe)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.92127 -0.45577 -0.04136 0.70941 1.83882
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.0001 1.1247 2.667 0.02573 *
## x1 0.5001 0.1179 4.241 0.00217 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.237 on 9 degrees of freedom
## Multiple R-squared: 0.6665, Adjusted R-squared: 0.6295
## F-statistic: 17.99 on 1 and 9 DF, p-value: 0.00217
plot(y1 ~ x1, data = anscombe )
abline(fit, col="red")
predict(fit, data.frame(x1 = 14))
## 1
## 10.00136
predict(fit, data.frame(x1 = 16))
## 1
## 11.00155
fit <- lm(y2 ~ x1, data = anscombe)
plot(y2 ~ x1, data = anscombe)
abline(fit, col= "red")
summary(fit)
##
## Call:
## lm(formula = y2 ~ x1, data = anscombe)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.9009 -0.7609 0.1291 0.9491 1.2691
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.001 1.125 2.667 0.02576 *
## x1 0.500 0.118 4.239 0.00218 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.237 on 9 degrees of freedom
## Multiple R-squared: 0.6662, Adjusted R-squared: 0.6292
## F-statistic: 17.97 on 1 and 9 DF, p-value: 0.002179
fit2 <- lm(y2 ~ poly(x1, 2), data = anscombe)
summary(fit2)
##
## Call:
## lm(formula = y2 ~ poly(x1, 2), data = anscombe)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0013287 -0.0011888 -0.0006294 0.0008741 0.0023776
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.5009091 0.0005043 14875 <2e-16 ***
## poly(x1, 2)1 5.2440442 0.0016725 3135 <2e-16 ***
## poly(x1, 2)2 -3.7116396 0.0016725 -2219 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.001672 on 8 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: 1
## F-statistic: 7.378e+06 on 2 and 8 DF, p-value: < 2.2e-16
plot(y2 ~ x1, data = anscombe)
lines(sort(anscombe$x1), fit2$fitted.values[order(anscombe$x1)], col="red")
## RLM
fit3 <- lm(y3 ~ x1, data = anscombe)
plot(y3 ~ x1, data = anscombe)
fit3 <- lm(y3 ~ x1, data = anscombe)
abline(fit3, col = "red")
#install.packages("MASS")
library(MASS)
## Warning: package 'MASS' was built under R version 3.4.3
fit4 <- rlm(y3 ~ x1, data = anscombe)
plot(y3 ~ x1, data = anscombe)
abline(fit4, col = 'red')
house_prices <- read.csv('https://raw.githubusercontent.com/ywchiu/fubonr/master/data/house-prices.csv')
head(house_prices)
## Home Price SqFt Bedrooms Bathrooms Offers Brick Neighborhood
## 1 1 114300 1790 2 2 2 No East
## 2 2 114200 2030 4 2 3 No East
## 3 3 114800 1740 3 2 1 No East
## 4 4 94700 1980 3 2 3 No East
## 5 5 119800 2130 3 3 3 No East
## 6 6 114600 1780 3 2 2 No North
plot(Price ~ SqFt, data = house_prices)
fit <- lm(Price ~ SqFt, data = house_prices)
abline(fit, col= "red")
## 591 租屋價格搜集
library(httr)
dfall <- data.frame()
for(pagecnt in 0:24){
res <- GET(paste0('https://rent.591.com.tw/home/search/rsList?is_new_list=1&type=1&kind=1&searchtype=1®ion=1§ion=5&firstRow=', pagecnt * 30 ,'&totalRows=711'))
house <- content(res)
print(pagecnt)
for( item in house$data$data){
df <- data.frame(address = item$address, price = item$price, area= item$area)
dfall <- rbind(dfall, df)
}
}
head(dfall)
str(dfall)
dfall$price <- as.integer(sub(pattern=',', replacement = '',dfall$price))
write.csv(x =dfall, file = 'rent_591_sample.csv')
rent<- read.csv('rent_591_sample.csv')
head(rent)
## X address price area
## 1 1 建國南路二段151巷大安森.. 50000 45
## 2 2 大安路一段‧正兩房‧電梯.. 25000 18
## 3 3 新生南路二段正面大安森林.. 200000 151
## 4 4 復興南路一段219巷忠孝復.. 48000 36
## 5 5 和平東路二段兩房一.. 25009 25
## 6 6 光復南路光復南路仁.. 22914 20
plot(price ~ area, data = rent)
fit <- lm(price ~ area, data = rent)
fit
##
## Call:
## lm(formula = price ~ area, data = rent)
##
## Coefficients:
## (Intercept) area
## 485.6 1484.5
abline(fit, col="red")
predict(fit, data.frame(area = 20))
## 1
## 30176.15
curve(dnorm(x), -5, 5, col="black")
house_prices <- read.csv('https://raw.githubusercontent.com/ywchiu/fubonr/master/data/house-prices.csv')
head(house_prices)
## Home Price SqFt Bedrooms Bathrooms Offers Brick Neighborhood
## 1 1 114300 1790 2 2 2 No East
## 2 2 114200 2030 4 2 3 No East
## 3 3 114800 1740 3 2 1 No East
## 4 4 94700 1980 3 2 3 No East
## 5 5 119800 2130 3 3 3 No East
## 6 6 114600 1780 3 2 2 No North
plot(Price ~ SqFt, data = house_prices)
fit <- lm(Price ~ SqFt + Bedrooms + Bathrooms + Offers, data = house_prices)
summary(fit)
##
## Call:
## lm(formula = Price ~ SqFt + Bedrooms + Bathrooms + Offers, data = house_prices)
##
## Residuals:
## Min 1Q Median 3Q Max
## -33608 -9889 -2968 9398 43243
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -17347.377 12724.896 -1.363 0.175
## SqFt 61.840 8.264 7.483 1.20e-11 ***
## Bedrooms 9319.753 2148.754 4.337 2.97e-05 ***
## Bathrooms 12646.347 3109.662 4.067 8.45e-05 ***
## Offers -13601.011 1324.819 -10.266 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15000 on 123 degrees of freedom
## Multiple R-squared: 0.6982, Adjusted R-squared: 0.6884
## F-statistic: 71.13 on 4 and 123 DF, p-value: < 2.2e-16
## lm
data("anscombe")
plot(y3 ~ x1, data = anscombe)
lmfit <- lm(y3 ~ x1, data = anscombe)
summary(lmfit)
##
## Call:
## lm(formula = y3 ~ x1, data = anscombe)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.1586 -0.6146 -0.2303 0.1540 3.2411
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.0025 1.1245 2.670 0.02562 *
## x1 0.4997 0.1179 4.239 0.00218 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.236 on 9 degrees of freedom
## Multiple R-squared: 0.6663, Adjusted R-squared: 0.6292
## F-statistic: 17.97 on 1 and 9 DF, p-value: 0.002176
abline(lmfit, coll ="red")
## Warning in int_abline(a = a, b = b, h = h, v = v, untf = untf, ...): "coll"
## 不是一個繪圖參數
predicted <- predict(lmfit, newsdata = anscombe[,x1])
predicted
## 1 2 3 4 5 6 7 8
## 7.999727 7.000273 9.498909 7.500000 8.499455 9.998636 6.000818 5.001364
## 9 10 11
## 8.999182 6.500545 5.501091
actual <- anscombe[, 'y3']
rmse <- (mean((actual - predicted)^ 2)) ^ 0.5
rmse
## [1] 1.118286
mu <- mean(actual)
rse <- mean((predicted - actual) ^ 2) / mean((mu - actual)^ 2)
rsquare <- 1 - rse
rsquare
## [1] 0.666324
model1 <- list(
rmse = rmse,
rse = rse,
rsquare = rsquare
)
model1
## $rmse
## [1] 1.118286
##
## $rse
## [1] 0.333676
##
## $rsquare
## [1] 0.666324
## rlm
data("anscombe")
plot(y3 ~ x1, data = anscombe)
rlmfit <- rlm(y3 ~ x1, data = anscombe)
abline(rlmfit, col="red")
predicted <- predict(rlmfit, newsdata = anscombe[,x1])
predicted
## 1 2 3 4 5 6 7 8
## 7.460722 6.769270 8.497899 7.114996 7.806448 8.843625 6.077819 5.386367
## 9 10 11
## 8.152173 6.423545 5.732093
actual <- anscombe[, 'y3']
rmse <- (mean((actual - predicted)^ 2)) ^ 0.5
rmse
## [1] 1.279045
mu <- mean(actual)
rse <- mean((predicted - actual) ^ 2) / mean((mu - actual)^ 2)
rsquare <- 1 - rse
rsquare
## [1] 0.5634933
model2 <- list(
rmse = rmse,
rse = rse,
rsquare = rsquare
)
model2
## $rmse
## [1] 1.279045
##
## $rse
## [1] 0.4365067
##
## $rsquare
## [1] 0.5634933
print('model1')
## [1] "model1"
print(model1)
## $rmse
## [1] 1.118286
##
## $rse
## [1] 0.333676
##
## $rsquare
## [1] 0.666324
print('model2')
## [1] "model2"
print(model2)
## $rmse
## [1] 1.279045
##
## $rse
## [1] 0.4365067
##
## $rsquare
## [1] 0.5634933
## lm
data("anscombe")
anscombe.subset <- anscombe[anscombe$x1 != 13, ]
plot(y3 ~ x1, data = anscombe.subset)
lmfit <- lm(y3 ~ x1, data = anscombe.subset)
summary(lmfit)
##
## Call:
## lm(formula = y3 ~ x1, data = anscombe.subset)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0041558 -0.0022240 0.0000649 0.0018182 0.0050649
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.0056494 0.0029242 1370 <2e-16 ***
## x1 0.3453896 0.0003206 1077 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.003082 on 8 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: 1
## F-statistic: 1.161e+06 on 1 and 8 DF, p-value: < 2.2e-16
abline(lmfit, coll ="red")
## Warning in int_abline(a = a, b = b, h = h, v = v, untf = untf, ...): "coll"
## 不是一個繪圖參數
predicted <- predict(lmfit, newsdata = anscombe.subset[,x1])
predicted
## 1 2 4 5 6 7 8 9
## 7.459545 6.768766 7.114156 7.804935 8.841104 6.077987 5.387208 8.150325
## 10 11
## 6.423377 5.732597
actual <- anscombe.subset[, 'y3']
rmse <- (mean((actual - predicted)^ 2)) ^ 0.5
rmse
## [1] 0.002756339
mu <- mean(actual)
rse <- mean((predicted - actual) ^ 2) / mean((mu - actual)^ 2)
rsquare <- 1 - rse
rsquare
## [1] 0.9999931
house_prices <- read.csv('https://raw.githubusercontent.com/ywchiu/fubonr/master/data/house-prices.csv')
head(house_prices)
## Home Price SqFt Bedrooms Bathrooms Offers Brick Neighborhood
## 1 1 114300 1790 2 2 2 No East
## 2 2 114200 2030 4 2 3 No East
## 3 3 114800 1740 3 2 1 No East
## 4 4 94700 1980 3 2 3 No East
## 5 5 119800 2130 3 3 3 No East
## 6 6 114600 1780 3 2 2 No North
#house_prices$Brick
house_prices$brick_d <- ifelse(house_prices$Brick == 'Yes', 1, 0)
house_prices$east <- ifelse(house_prices$Neighborhood == 'East', 1, 0)
house_prices$north <- ifelse(house_prices$Neighborhood == 'North', 1, 0)
house_prices$Brick <- NULL
house_prices$Neighborhood <- NULL
head(house_prices)
## Home Price SqFt Bedrooms Bathrooms Offers brick_d east north
## 1 1 114300 1790 2 2 2 0 1 0
## 2 2 114200 2030 4 2 3 0 1 0
## 3 3 114800 1740 3 2 1 0 1 0
## 4 4 94700 1980 3 2 3 0 1 0
## 5 5 119800 2130 3 3 3 0 1 0
## 6 6 114600 1780 3 2 2 0 0 1
set.seed(110)
idx <- sample.int(2, nrow(house_prices), replace = TRUE, prob = c(0.6,0.4))
training_data <- house_prices[idx == 1, ]
validation_data <- house_prices[idx == 2, ]
lm.fit1 <- lm(Price ~ SqFt + Bathrooms + Bedrooms + Offers + north + east+ brick_d, data = training_data)
summary(lm.fit1)
##
## Call:
## lm(formula = Price ~ SqFt + Bathrooms + Bedrooms + Offers + north +
## east + brick_d, data = training_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29684.9 -7243.2 131.9 5442.4 26997.5
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16196.683 15352.916 1.055 0.29512
## SqFt 50.525 8.671 5.827 1.64e-07 ***
## Bathrooms 11323.358 2791.959 4.056 0.00013 ***
## Bedrooms 5279.107 2207.022 2.392 0.01949 *
## Offers -8172.431 1424.182 -5.738 2.34e-07 ***
## north -20575.043 4111.119 -5.005 4.10e-06 ***
## east -23222.284 3507.448 -6.621 6.43e-09 ***
## brick_d 15089.593 2618.395 5.763 2.12e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10470 on 69 degrees of freedom
## Multiple R-squared: 0.8412, Adjusted R-squared: 0.8251
## F-statistic: 52.21 on 7 and 69 DF, p-value: < 2.2e-16
nrow(training_data)
## [1] 77
lm.fit1.step <- stepAIC(lm.fit1)
## Start: AIC=1433.07
## Price ~ SqFt + Bathrooms + Bedrooms + Offers + north + east +
## brick_d
##
## Df Sum of Sq RSS AIC
## <none> 7.5689e+09 1433.1
## - Bedrooms 1 627616327 8.1966e+09 1437.2
## - Bathrooms 1 1804342905 9.3733e+09 1447.5
## - north 1 2747564839 1.0317e+10 1454.9
## - Offers 1 3612086579 1.1181e+10 1461.1
## - brick_d 1 3643102656 1.1212e+10 1461.3
## - SqFt 1 3724914639 1.1294e+10 1461.9
## - east 1 4808548343 1.2377e+10 1468.9
summary(lm.fit1.step)
##
## Call:
## lm(formula = Price ~ SqFt + Bathrooms + Bedrooms + Offers + north +
## east + brick_d, data = training_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29684.9 -7243.2 131.9 5442.4 26997.5
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16196.683 15352.916 1.055 0.29512
## SqFt 50.525 8.671 5.827 1.64e-07 ***
## Bathrooms 11323.358 2791.959 4.056 0.00013 ***
## Bedrooms 5279.107 2207.022 2.392 0.01949 *
## Offers -8172.431 1424.182 -5.738 2.34e-07 ***
## north -20575.043 4111.119 -5.005 4.10e-06 ***
## east -23222.284 3507.448 -6.621 6.43e-09 ***
## brick_d 15089.593 2618.395 5.763 2.12e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10470 on 69 degrees of freedom
## Multiple R-squared: 0.8412, Adjusted R-squared: 0.8251
## F-statistic: 52.21 on 7 and 69 DF, p-value: < 2.2e-16
training_data$predict.price <- predict(lm.fit1)
training_data$error <- residuals(lm.fit1)
head(training_data)
## Home Price SqFt Bedrooms Bathrooms Offers brick_d east north
## 2 2 114200 2030 4 2 3 0 1 0
## 6 6 114600 1780 3 2 2 0 0 1
## 10 10 104000 1730 3 3 3 0 1 0
## 12 12 123000 1870 2 2 2 1 1 0
## 13 13 102600 1910 3 2 4 0 0 1
## 16 16 145800 1780 4 2 1 0 0 0
## predict.price error
## 2 114786.86 -586.8556
## 6 107696.07 6903.9343
## 10 105673.48 -1673.4802
## 12 119406.60 3593.4025
## 13 97919.51 4680.4921
## 16 141722.65 4077.3531
hist(training_data$error)
validation_data$predict.price <- predict(lm.fit1,
newdata = validation_data)
validation_data$error <- validation_data$predict.price - validation_data$Price
hist(validation_data$error)
## 驗證在591 上
library(httr)
dfall <- data.frame()
for(pagecnt in 0:24){
res <- GET(paste0('https://rent.591.com.tw/home/search/rsList?is_new_list=1&type=1&kind=1&searchtype=1®ion=1§ion=5&firstRow=', pagecnt * 30 ,'&totalRows=711'))
house <- content(res)
print(pagecnt)
for( item in house$data$data){
if (!is.null(item$layout)){
df <- data.frame(address = item$address,
price = item$price,
area = item$area,
layout = item$layout,
floor = item$floor,
allfloor = item$allfloor
)
dfall <- rbind(dfall, df)
}
}
}
head(dfall)
str(dfall)
head(dfall)
dfall$price <- as.integer(sub(pattern=',', replacement = '',dfall$price))
dfall$bedroom <- gsub(pattern='(\\d+)房(\\d+)廳(\\d+)衛', '\\1', dfall$layout)
dfall$livingroom <- gsub(pattern='(\\d+)房(\\d+)廳(\\d+)衛', '\\2', dfall$layout)
dfall$bathroom <- gsub(pattern='(\\d+)房(\\d+)廳(\\d+)衛', '\\3', dfall$layout)
dfall$bedroom <- as.integer(dfall$bedroom)
dfall$bathroom <- as.integer(dfall$bathroom)
dfall$livingroom <- as.integer(dfall$livingroom)
dfall <- dfall[dfall$floor < 50, ]
dfall <- dfall[dfall$bathroom != '開放式格局',]
dfall$layout <- NULL
write.csv(x =dfall, file = 'rent_591_regression.csv')
rent <- read.csv('rent_591_regression.csv')
head(rent)
## X address price area floor allfloor livingroom
## 1 1 新生南路二段正面大安森林.. 200000 151 2 15 2
## 2 2 復興南路一段219巷忠孝復.. 48000 36 2 7 2
## 3 3 和平東路二段兩房一.. 25009 25 2 4 1
## 4 4 光復南路光復南路仁.. 22914 20 7 12 1
## 5 5 樂業街2房1廳基隆.. 19906 20 2 4 1
## 6 7 信義路三段台北大安.. 36000 14 12 14 2
## bathroom bedroom
## 1 4 4
## 2 1 3
## 3 1 2
## 4 1 2
## 5 1 2
## 6 1 1
fit <- lm(price ~ area + floor + allfloor + livingroom + bathroom + bedroom, data = rent)
summary(fit)
##
## Call:
## lm(formula = price ~ area + floor + allfloor + livingroom + bathroom +
## bedroom, data = rent)
##
## Residuals:
## Min 1Q Median 3Q Max
## -69228 -10669 -1390 7854 107682
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 866.00 3169.59 0.273 0.78476
## area 1476.74 53.13 27.795 < 2e-16 ***
## floor 293.55 225.26 1.303 0.19295
## allfloor 221.61 198.33 1.117 0.26421
## livingroom 6264.27 1896.61 3.303 0.00101 **
## bathroom 272.16 1800.47 0.151 0.87989
## bedroom -6038.56 1079.81 -5.592 3.21e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 19160 on 702 degrees of freedom
## Multiple R-squared: 0.7398, Adjusted R-squared: 0.7376
## F-statistic: 332.7 on 6 and 702 DF, p-value: < 2.2e-16
# for stepAIC
library(MASS)
fit.step1 <- stepAIC(fit)
## Start: AIC=13988.97
## price ~ area + floor + allfloor + livingroom + bathroom + bedroom
##
## Df Sum of Sq RSS AIC
## - bathroom 1 8.3844e+06 2.5761e+11 13987
## - allfloor 1 4.5816e+08 2.5806e+11 13988
## - floor 1 6.2314e+08 2.5822e+11 13989
## <none> 2.5760e+11 13989
## - livingroom 1 4.0030e+09 2.6160e+11 13998
## - bedroom 1 1.1476e+10 2.6907e+11 14018
## - area 1 2.8350e+11 5.4109e+11 14513
##
## Step: AIC=13986.99
## price ~ area + floor + allfloor + livingroom + bedroom
##
## Df Sum of Sq RSS AIC
## - allfloor 1 4.5560e+08 2.5806e+11 13986
## - floor 1 6.3527e+08 2.5824e+11 13987
## <none> 2.5761e+11 13987
## - livingroom 1 4.3908e+09 2.6200e+11 13997
## - bedroom 1 1.2710e+10 2.7032e+11 14019
## - area 1 3.8118e+11 6.3878e+11 14629
##
## Step: AIC=13986.24
## price ~ area + floor + livingroom + bedroom
##
## Df Sum of Sq RSS AIC
## <none> 2.5806e+11 13986
## - floor 1 2.3614e+09 2.6042e+11 13991
## - livingroom 1 4.1510e+09 2.6221e+11 13996
## - bedroom 1 1.2793e+10 2.7086e+11 14018
## - area 1 4.0328e+11 6.6135e+11 14652
# Withohout MASS
fit.step <- step(fit)
## Start: AIC=13988.97
## price ~ area + floor + allfloor + livingroom + bathroom + bedroom
##
## Df Sum of Sq RSS AIC
## - bathroom 1 8.3844e+06 2.5761e+11 13987
## - allfloor 1 4.5816e+08 2.5806e+11 13988
## - floor 1 6.2314e+08 2.5822e+11 13989
## <none> 2.5760e+11 13989
## - livingroom 1 4.0030e+09 2.6160e+11 13998
## - bedroom 1 1.1476e+10 2.6907e+11 14018
## - area 1 2.8350e+11 5.4109e+11 14513
##
## Step: AIC=13986.99
## price ~ area + floor + allfloor + livingroom + bedroom
##
## Df Sum of Sq RSS AIC
## - allfloor 1 4.5560e+08 2.5806e+11 13986
## - floor 1 6.3527e+08 2.5824e+11 13987
## <none> 2.5761e+11 13987
## - livingroom 1 4.3908e+09 2.6200e+11 13997
## - bedroom 1 1.2710e+10 2.7032e+11 14019
## - area 1 3.8118e+11 6.3878e+11 14629
##
## Step: AIC=13986.24
## price ~ area + floor + livingroom + bedroom
##
## Df Sum of Sq RSS AIC
## <none> 2.5806e+11 13986
## - floor 1 2.3614e+09 2.6042e+11 13991
## - livingroom 1 4.1510e+09 2.6221e+11 13996
## - bedroom 1 1.2793e+10 2.7086e+11 14018
## - area 1 4.0328e+11 6.6135e+11 14652
summary(fit.step)
##
## Call:
## lm(formula = price ~ area + floor + livingroom + bedroom, data = rent)
##
## Residuals:
## Min 1Q Median 3Q Max
## -69514 -10387 -1593 8442 107176
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2297.60 2900.99 0.792 0.428625
## area 1491.20 44.96 33.169 < 2e-16 ***
## floor 449.74 177.20 2.538 0.011361 *
## livingroom 6130.93 1821.90 3.365 0.000807 ***
## bedroom -6002.33 1016.02 -5.908 5.4e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 19150 on 704 degrees of freedom
## Multiple R-squared: 0.7394, Adjusted R-squared: 0.7379
## F-statistic: 499.3 on 4 and 704 DF, p-value: < 2.2e-16
predict(fit.step, data.frame(area = 20, floor= 3, livingroom = 1, bedroom= 2))
## 1
## 27597.11
?glm
## starting httpd help server ... done
rent <- read.csv('rent_591_regression.csv')
fit <- glm(price ~ area + floor + allfloor + livingroom + bathroom + bedroom, data = rent, family = 'gaussian')
summary(fit)
##
## Call:
## glm(formula = price ~ area + floor + allfloor + livingroom +
## bathroom + bedroom, family = "gaussian", data = rent)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -69228 -10669 -1390 7854 107682
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 866.00 3169.59 0.273 0.78476
## area 1476.74 53.13 27.795 < 2e-16 ***
## floor 293.55 225.26 1.303 0.19295
## allfloor 221.61 198.33 1.117 0.26421
## livingroom 6264.27 1896.61 3.303 0.00101 **
## bathroom 272.16 1800.47 0.151 0.87989
## bedroom -6038.56 1079.81 -5.592 3.21e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 366949358)
##
## Null deviance: 9.9015e+11 on 708 degrees of freedom
## Residual deviance: 2.5760e+11 on 702 degrees of freedom
## AIC: 16003
##
## Number of Fisher Scoring iterations: 2
fit.step <- step(fit)
## Start: AIC=16003.02
## price ~ area + floor + allfloor + livingroom + bathroom + bedroom
##
## Df Deviance AIC
## - bathroom 1 2.5761e+11 16001
## - allfloor 1 2.5806e+11 16002
## - floor 1 2.5822e+11 16003
## <none> 2.5760e+11 16003
## - livingroom 1 2.6160e+11 16012
## - bedroom 1 2.6907e+11 16032
## - area 1 5.4109e+11 16527
##
## Step: AIC=16001.04
## price ~ area + floor + allfloor + livingroom + bedroom
##
## Df Deviance AIC
## - allfloor 1 2.5806e+11 16000
## - floor 1 2.5824e+11 16001
## <none> 2.5761e+11 16001
## - livingroom 1 2.6200e+11 16011
## - bedroom 1 2.7032e+11 16033
## - area 1 6.3878e+11 16643
##
## Step: AIC=16000.3
## price ~ area + floor + livingroom + bedroom
##
## Df Deviance AIC
## <none> 2.5806e+11 16000
## - floor 1 2.6042e+11 16005
## - livingroom 1 2.6221e+11 16010
## - bedroom 1 2.7086e+11 16033
## - area 1 6.6135e+11 16666
data(iris)
dataset <- iris[51:150, ]
dataset$Species <- factor(dataset$Species)
fit <- glm(Species ~., data = dataset, family = binomial(link = "logit"))
str(dataset)
## 'data.frame': 100 obs. of 5 variables:
## $ Sepal.Length: num 7 6.4 6.9 5.5 6.5 5.7 6.3 4.9 6.6 5.2 ...
## $ Sepal.Width : num 3.2 3.2 3.1 2.3 2.8 2.8 3.3 2.4 2.9 2.7 ...
## $ Petal.Length: num 4.7 4.5 4.9 4 4.6 4.5 4.7 3.3 4.6 3.9 ...
## $ Petal.Width : num 1.4 1.5 1.5 1.3 1.5 1.3 1.6 1 1.3 1.4 ...
## $ Species : Factor w/ 2 levels "versicolor","virginica": 1 1 1 1 1 1 1 1 1 1 ...
summary(fit)
##
## Call:
## glm(formula = Species ~ ., family = binomial(link = "logit"),
## data = dataset)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.01105 -0.00541 -0.00001 0.00677 1.78065
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -42.638 25.707 -1.659 0.0972 .
## Sepal.Length -2.465 2.394 -1.030 0.3032
## Sepal.Width -6.681 4.480 -1.491 0.1359
## Petal.Length 9.429 4.737 1.991 0.0465 *
## Petal.Width 18.286 9.743 1.877 0.0605 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 138.629 on 99 degrees of freedom
## Residual deviance: 11.899 on 95 degrees of freedom
## AIC: 21.899
##
## Number of Fisher Scoring iterations: 10
fit.step <- step(fit)
## Start: AIC=21.9
## Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Df Deviance AIC
## - Sepal.Length 1 13.266 21.266
## <none> 11.899 21.899
## - Sepal.Width 1 15.492 23.492
## - Petal.Width 1 23.772 31.772
## - Petal.Length 1 25.902 33.902
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Step: AIC=21.27
## Species ~ Sepal.Width + Petal.Length + Petal.Width
##
## Df Deviance AIC
## <none> 13.266 21.266
## - Sepal.Width 1 20.564 26.564
## - Petal.Length 1 27.399 33.399
## - Petal.Width 1 31.512 37.512