You should complete the following problems in R. Be sure to provide the instructor with both your code and output.
homes <- read.csv("C:/Users/raze1/OneDrive/Desktop/UIndy/MSDA 622/Homework/Homework 5/homes.csv")
homes
## Price Floor Lot Bath Bed BathBed Year Age AgeSq Gar Status DAc School DEd
## 1 388.0 2.180 4 3.0 4 12.0 1940 -3.0 9.00 0 Sold 0 Edison 1
## 2 450.0 2.054 5 3.0 4 12.0 1957 -1.3 1.69 2 Sold 0 Edison 1
## 3 386.0 2.112 5 2.0 4 8.0 1955 -1.5 2.25 2 Sold 0 Edison 1
## 4 350.0 1.442 6 1.0 2 2.0 1956 -1.4 1.96 1 Active 1 Adams 0
## 5 155.5 1.800 1 2.0 4 8.0 1994 2.4 5.76 1 Sold 0 Adams 0
## 6 220.0 1.965 5 2.0 3 6.0 1940 -3.0 9.00 1 Sold 0 Adams 0
## 7 239.5 1.800 4 1.1 4 4.4 1958 -1.2 1.44 1 Active 1 Parker 0
## 8 207.0 2.254 4 2.0 4 8.0 1961 -0.9 0.81 2 Sold 0 Parker 0
## 9 269.9 1.922 4 2.1 4 8.4 1965 -0.5 0.25 2 Active 1 Parker 0
## 10 238.8 1.920 5 2.1 3 6.3 1968 -0.2 0.04 2 Sold 0 Parker 0
## 11 359.9 2.200 5 2.0 3 6.0 1970 0.0 0.00 2 Active 1 Redwood 0
## 12 249.7 1.868 4 2.0 4 8.0 1965 -0.5 0.25 2 Sold 0 Redwood 0
## 13 265.0 1.875 4 2.1 3 6.3 1979 0.9 0.81 2 Sold 0 Redwood 0
## 14 349.0 2.000 5 2.0 3 6.0 1997 2.7 7.29 2 Pending 0 Redwood 0
## 15 319.0 1.855 4 2.0 4 8.0 1925 -4.5 20.25 2 Sold 0 Harris 0
## 16 339.0 1.928 5 3.0 3 9.0 1972 0.2 0.04 2 Active 1 Redwood 0
## 17 283.0 1.980 4 3.0 4 12.0 1971 0.1 0.01 2 Active 1 Redwood 0
## 18 275.0 1.528 3 2.1 3 6.3 1975 0.5 0.25 2 Active 1 Redwood 0
## 19 299.9 1.882 3 2.1 3 6.3 1976 0.6 0.36 2 Active 1 Redwood 0
## 20 277.0 1.440 3 2.0 3 6.0 1948 -2.2 4.84 2 Active 1 Edison 1
## 21 299.0 1.994 8 3.0 2 6.0 2005 3.5 12.25 3 Pending 0 Parker 0
## 22 185.0 1.860 4 2.0 5 10.0 1953 -1.7 2.89 0 Sold 0 Parker 0
## 23 232.0 2.031 4 2.0 4 8.0 1950 -2.0 4.00 0 Sold 0 Parker 0
## 24 214.9 1.868 5 2.0 4 8.0 1966 -0.4 0.16 2 Sold 0 Redwood 0
## 25 195.0 2.112 4 2.0 3 6.0 1966 -0.4 0.16 2 Sold 0 Redwood 0
## 26 256.0 1.974 4 2.0 3 6.0 1961 -0.9 0.81 2 Sold 0 Harris 0
## 27 269.7 2.100 4 2.0 4 8.0 1969 -0.1 0.01 2 Active 1 Parker 0
## 28 330.0 1.894 2 2.0 3 6.0 1950 -2.0 4.00 1 Sold 0 Edison 1
## 29 249.9 1.804 5 2.0 3 6.0 1960 -1.0 1.00 2 Pending 0 Redwood 0
## 30 279.9 2.010 5 2.0 3 6.0 1969 -0.1 0.01 2 Pending 0 Edison 1
## 31 325.0 1.661 4 1.1 3 3.3 1974 0.4 0.16 2 Active 1 Crest 0
## 32 259.9 1.708 4 2.0 4 8.0 1937 -3.3 10.89 0 Active 1 Crest 0
## 33 324.5 1.800 2 2.1 3 6.3 2000 3.0 9.00 1 Pending 0 Harris 0
## 34 359.9 1.968 3 3.0 3 9.0 1980 1.0 1.00 2 Active 1 Harris 0
## 35 252.5 1.888 2 2.0 6 12.0 1920 -5.0 25.00 0 Sold 0 Edison 1
## 36 269.0 1.800 3 3.0 5 15.0 1947 -2.3 5.29 0 Sold 0 Redwood 0
## 37 235.0 1.712 5 1.0 3 3.0 1948 -2.2 4.84 1 Sold 0 Edison 1
## 38 256.5 2.200 4 2.1 3 6.3 1966 -0.4 0.16 2 Sold 0 Redwood 0
## 39 319.9 1.920 7 2.1 3 6.3 2004 3.4 11.56 2 Active 1 Parker 0
## 40 355.0 2.129 5 2.0 3 6.0 2003 3.3 10.89 2 Pending 0 Parker 0
## 41 285.0 2.235 7 3.1 4 12.4 1975 0.5 0.25 2 Active 1 Crest 0
## 42 242.0 2.279 5 2.1 4 8.4 1949 -2.1 4.41 1 Sold 0 Parker 0
## 43 197.0 1.850 3 2.0 4 8.0 1964 -0.6 0.36 1 Sold 0 Redwood 0
## 44 281.8 1.886 4 2.0 3 6.0 2005 3.5 12.25 2 Sold 0 Edison 1
## 45 259.0 2.080 2 2.0 4 8.0 1969 -0.1 0.01 2 Pending 0 Redwood 0
## 46 189.5 1.906 3 2.1 3 6.3 1975 0.5 0.25 0 Sold 0 Redwood 0
## 47 339.9 2.048 5 2.0 2 4.0 1993 2.3 5.29 3 Pending 0 Redwood 0
## 48 297.0 2.106 4 2.1 3 6.3 1995 2.5 6.25 2 Sold 0 Redwood 0
## 49 295.0 2.028 4 3.0 5 15.0 1992 2.2 4.84 2 Sold 0 Redwood 0
## 50 222.5 1.794 4 2.0 3 6.0 1960 -1.0 1.00 0 Sold 0 Crest 0
## 51 199.0 2.016 3 2.0 5 10.0 1963 -0.7 0.49 1 Pending 0 Redwood 0
## 52 385.5 1.904 4 1.1 3 3.3 1919 -5.1 26.01 1 Sold 0 Edison 1
## 53 230.0 2.234 3 3.0 4 12.0 1972 0.2 0.04 2 Sold 0 Redwood 0
## 54 285.0 2.269 3 3.1 4 12.4 1905 -6.5 42.25 0 Sold 0 Edison 1
## 55 243.0 1.810 4 3.0 3 9.0 1961 -0.9 0.81 2 Sold 0 Redwood 0
## 56 217.0 1.875 4 2.0 3 6.0 1961 -0.9 0.81 0 Sold 0 Redwood 0
## 57 259.9 1.683 5 2.1 3 6.3 1979 0.9 0.81 1 Active 1 Harris 0
## 58 349.5 2.080 4 2.1 3 6.3 2005 3.5 12.25 2 Active 1 Harris 0
## 59 345.0 2.126 1 3.1 3 9.3 2005 3.5 12.25 2 Active 1 Harris 0
## 60 336.0 2.126 1 3.1 3 9.3 2005 3.5 12.25 2 Pending 0 Harris 0
## 61 333.8 2.126 1 3.1 3 9.3 2005 3.5 12.25 2 Active 1 Harris 0
## 62 340.0 2.126 1 3.1 3 9.3 2005 3.5 12.25 2 Sold 0 Harris 0
## 63 345.0 2.126 1 3.1 3 9.3 2005 3.5 12.25 2 Sold 0 Harris 0
## 64 374.5 1.712 5 1.1 3 3.3 1908 -6.2 38.44 2 Pending 0 Edison 1
## 65 236.5 1.950 4 3.0 4 12.0 1966 -0.4 0.16 2 Pending 0 Redwood 0
## 66 350.0 2.020 7 2.0 3 6.0 1976 0.6 0.36 2 Pending 0 Harris 0
## 67 270.0 2.053 3 2.1 3 6.3 1977 0.7 0.49 2 Active 1 Redwood 0
## 68 299.0 1.743 3 2.0 3 6.0 1988 1.8 3.24 2 Active 1 Harris 0
## 69 285.0 1.878 5 2.1 3 6.3 1975 0.5 0.25 2 Active 1 Crest 0
## 70 255.0 1.900 4 2.0 3 6.0 1980 1.0 1.00 2 Sold 0 Parker 0
## 71 259.0 2.208 4 3.0 3 9.0 1979 0.9 0.81 2 Active 1 Parker 0
## 72 249.9 2.081 5 2.1 4 8.4 1970 0.0 0.00 1 Sold 0 Harris 0
## 73 215.0 2.044 1 1.1 4 4.4 1936 -3.4 11.56 0 Sold 0 Parker 0
## 74 435.0 2.253 11 2.0 3 6.0 1979 0.9 0.81 2 Sold 0 Redwood 0
## 75 274.9 1.861 4 2.0 4 8.0 1995 2.5 6.25 2 Active 1 Parker 0
## 76 349.5 2.896 4 3.0 5 15.0 1979 0.9 0.81 2 Active 1 Crest 0
## DHa DAd DCr DPa
## 1 0 0 0 0
## 2 0 0 0 0
## 3 0 0 0 0
## 4 0 1 0 0
## 5 0 1 0 0
## 6 0 1 0 0
## 7 0 0 0 1
## 8 0 0 0 1
## 9 0 0 0 1
## 10 0 0 0 1
## 11 0 0 0 0
## 12 0 0 0 0
## 13 0 0 0 0
## 14 0 0 0 0
## 15 1 0 0 0
## 16 0 0 0 0
## 17 0 0 0 0
## 18 0 0 0 0
## 19 0 0 0 0
## 20 0 0 0 0
## 21 0 0 0 1
## 22 0 0 0 1
## 23 0 0 0 1
## 24 0 0 0 0
## 25 0 0 0 0
## 26 1 0 0 0
## 27 0 0 0 1
## 28 0 0 0 0
## 29 0 0 0 0
## 30 0 0 0 0
## 31 0 0 1 0
## 32 0 0 1 0
## 33 1 0 0 0
## 34 1 0 0 0
## 35 0 0 0 0
## 36 0 0 0 0
## 37 0 0 0 0
## 38 0 0 0 0
## 39 0 0 0 1
## 40 0 0 0 1
## 41 0 0 1 0
## 42 0 0 0 1
## 43 0 0 0 0
## 44 0 0 0 0
## 45 0 0 0 0
## 46 0 0 0 0
## 47 0 0 0 0
## 48 0 0 0 0
## 49 0 0 0 0
## 50 0 0 1 0
## 51 0 0 0 0
## 52 0 0 0 0
## 53 0 0 0 0
## 54 0 0 0 0
## 55 0 0 0 0
## 56 0 0 0 0
## 57 1 0 0 0
## 58 1 0 0 0
## 59 1 0 0 0
## 60 1 0 0 0
## 61 1 0 0 0
## 62 1 0 0 0
## 63 1 0 0 0
## 64 0 0 0 0
## 65 0 0 0 0
## 66 1 0 0 0
## 67 0 0 0 0
## 68 1 0 0 0
## 69 0 0 1 0
## 70 0 0 0 1
## 71 0 0 0 1
## 72 1 0 0 0
## 73 0 0 0 1
## 74 0 0 0 0
## 75 0 0 0 1
## 76 0 0 1 0
head(homes)
## Price Floor Lot Bath Bed BathBed Year Age AgeSq Gar Status DAc School DEd
## 1 388.0 2.180 4 3 4 12 1940 -3.0 9.00 0 Sold 0 Edison 1
## 2 450.0 2.054 5 3 4 12 1957 -1.3 1.69 2 Sold 0 Edison 1
## 3 386.0 2.112 5 2 4 8 1955 -1.5 2.25 2 Sold 0 Edison 1
## 4 350.0 1.442 6 1 2 2 1956 -1.4 1.96 1 Active 1 Adams 0
## 5 155.5 1.800 1 2 4 8 1994 2.4 5.76 1 Sold 0 Adams 0
## 6 220.0 1.965 5 2 3 6 1940 -3.0 9.00 1 Sold 0 Adams 0
## DHa DAd DCr DPa
## 1 0 0 0 0
## 2 0 0 0 0
## 3 0 0 0 0
## 4 0 1 0 0
## 5 0 1 0 0
## 6 0 1 0 0
summary(homes)
## Price Floor Lot Bath
## Min. :155.5 Min. :1.440 Min. : 1.000 Min. :1.000
## 1st Qu.:242.8 1st Qu.:1.861 1st Qu.: 3.000 1st Qu.:2.000
## Median :276.0 Median :1.966 Median : 4.000 Median :2.000
## Mean :285.8 Mean :1.970 Mean : 3.987 Mean :2.208
## 3rd Qu.:336.8 3rd Qu.:2.107 3rd Qu.: 5.000 3rd Qu.:3.000
## Max. :450.0 Max. :2.896 Max. :11.000 Max. :3.100
## Bed BathBed Year Age
## Min. :2.000 Min. : 2.000 Min. :1905 Min. :-6.50000
## 1st Qu.:3.000 1st Qu.: 6.000 1st Qu.:1958 1st Qu.:-1.22500
## Median :3.000 Median : 6.300 Median :1970 Median :-0.05000
## Mean :3.447 Mean : 7.672 Mean :1969 Mean :-0.05921
## 3rd Qu.:4.000 3rd Qu.: 9.000 3rd Qu.:1980 3rd Qu.: 1.00000
## Max. :6.000 Max. :15.000 Max. :2005 Max. : 3.50000
## AgeSq Gar Status DAc
## Min. : 0.00 Min. :0.000 Length:76 Min. :0.0000
## 1st Qu.: 0.25 1st Qu.:1.000 Class :character 1st Qu.:0.0000
## Median : 1.22 Median :2.000 Mode :character Median :0.0000
## Mean : 5.45 Mean :1.566 Mean :0.3289
## 3rd Qu.: 9.00 3rd Qu.:2.000 3rd Qu.:1.0000
## Max. :42.25 Max. :3.000 Max. :1.0000
## School DEd DHa DAd
## Length:76 Min. :0.0000 Min. :0.0000 Min. :0.00000
## Class :character 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000
## Mode :character Median :0.0000 Median :0.0000 Median :0.00000
## Mean :0.1579 Mean :0.1842 Mean :0.03947
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.0000 Max. :1.00000
## DCr DPa
## Min. :0.00000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.00000 Median :0.0000
## Mean :0.07895 Mean :0.1974
## 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :1.00000 Max. :1.0000
homes2 <- subset(homes, select = -c(Status, School))
summary(homes2)
## Price Floor Lot Bath
## Min. :155.5 Min. :1.440 Min. : 1.000 Min. :1.000
## 1st Qu.:242.8 1st Qu.:1.861 1st Qu.: 3.000 1st Qu.:2.000
## Median :276.0 Median :1.966 Median : 4.000 Median :2.000
## Mean :285.8 Mean :1.970 Mean : 3.987 Mean :2.208
## 3rd Qu.:336.8 3rd Qu.:2.107 3rd Qu.: 5.000 3rd Qu.:3.000
## Max. :450.0 Max. :2.896 Max. :11.000 Max. :3.100
## Bed BathBed Year Age
## Min. :2.000 Min. : 2.000 Min. :1905 Min. :-6.50000
## 1st Qu.:3.000 1st Qu.: 6.000 1st Qu.:1958 1st Qu.:-1.22500
## Median :3.000 Median : 6.300 Median :1970 Median :-0.05000
## Mean :3.447 Mean : 7.672 Mean :1969 Mean :-0.05921
## 3rd Qu.:4.000 3rd Qu.: 9.000 3rd Qu.:1980 3rd Qu.: 1.00000
## Max. :6.000 Max. :15.000 Max. :2005 Max. : 3.50000
## AgeSq Gar DAc DEd
## Min. : 0.00 Min. :0.000 Min. :0.0000 Min. :0.0000
## 1st Qu.: 0.25 1st Qu.:1.000 1st Qu.:0.0000 1st Qu.:0.0000
## Median : 1.22 Median :2.000 Median :0.0000 Median :0.0000
## Mean : 5.45 Mean :1.566 Mean :0.3289 Mean :0.1579
## 3rd Qu.: 9.00 3rd Qu.:2.000 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :42.25 Max. :3.000 Max. :1.0000 Max. :1.0000
## DHa DAd DCr DPa
## Min. :0.0000 Min. :0.00000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.0000 Median :0.00000 Median :0.00000 Median :0.0000
## Mean :0.1842 Mean :0.03947 Mean :0.07895 Mean :0.1974
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.00000 Max. :1.00000 Max. :1.0000
maxs <- apply(homes2, 2, max)
mins <- apply(homes2, 2, min)
maxs
## Price Floor Lot Bath Bed BathBed Year Age
## 450.000 2.896 11.000 3.100 6.000 15.000 2005.000 3.500
## AgeSq Gar DAc DEd DHa DAd DCr DPa
## 42.250 3.000 1.000 1.000 1.000 1.000 1.000 1.000
mins
## Price Floor Lot Bath Bed BathBed Year Age AgeSq Gar
## 155.50 1.44 1.00 1.00 2.00 2.00 1905.00 -6.50 0.00 0.00
## DAc DEd DHa DAd DCr DPa
## 0.00 0.00 0.00 0.00 0.00 0.00
scaled <- as.data.frame(scale(homes2, center = mins, scale = maxs - mins))
summary(scaled)
## Price Floor Lot Bath
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.2963 1st Qu.:0.2890 1st Qu.:0.2000 1st Qu.:0.4762
## Median :0.4092 Median :0.3616 Median :0.3000 Median :0.4762
## Mean :0.4424 Mean :0.3643 Mean :0.2987 Mean :0.5752
## 3rd Qu.:0.6154 3rd Qu.:0.4584 3rd Qu.:0.4000 3rd Qu.:0.9524
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## Bed BathBed Year Age
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.2500 1st Qu.:0.3077 1st Qu.:0.5275 1st Qu.:0.5275
## Median :0.2500 Median :0.3308 Median :0.6450 Median :0.6450
## Mean :0.3618 Mean :0.4363 Mean :0.6441 Mean :0.6441
## 3rd Qu.:0.5000 3rd Qu.:0.5385 3rd Qu.:0.7500 3rd Qu.:0.7500
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## AgeSq Gar DAc DEd
## Min. :0.000000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.005917 1st Qu.:0.3333 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.028876 Median :0.6667 Median :0.0000 Median :0.0000
## Mean :0.128991 Mean :0.5219 Mean :0.3289 Mean :0.1579
## 3rd Qu.:0.213018 3rd Qu.:0.6667 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :1.000000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## DHa DAd DCr DPa
## Min. :0.0000 Min. :0.00000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.0000 Median :0.00000 Median :0.00000 Median :0.0000
## Mean :0.1842 Mean :0.03947 Mean :0.07895 Mean :0.1974
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.00000 Max. :1.00000 Max. :1.0000
index <- sample(nrow(homes2), nrow(homes2)*0.85)
train_homes2 <- scaled[index,]
test_homes2 <- scaled[-index,]
library(neuralnet)
## Warning: package 'neuralnet' was built under R version 4.2.3
nn <- neuralnet(Price ~ ., data=train_homes2, hidden=c(2,3,2), linear.output=T)
nn$act.fct
## function (x)
## {
## 1/(1 + exp(-x))
## }
## <bytecode: 0x000001e3fe8f80b0>
## <environment: 0x000001e3fe905728>
## attr(,"type")
## [1] "logistic"
plot(nn)
# Standardized predictions on the testing set
pr_nn <- compute(nn, test_homes2[,2:16])
# Predicted value back to original scale
pr_nn_org <- pr_nn$net.result*(max(homes2$Price) - min(homes2$Price)) + min(homes2$Price)
pr_nn_org
## [,1]
## 2 391.6345
## 9 269.5528
## 18 321.0098
## 23 197.9864
## 37 267.0494
## 40 274.5402
## 47 414.0798
## 52 321.4810
## 56 197.4806
## 61 369.7251
## 66 290.8280
## 71 289.2237
# Target in the testing set converted back to its original scale
test_r <- (test_homes2$Price)*(max(homes2$Price) - min(homes2$Price)) + min(homes2$Price)
# MSE of testing set
MSPE_nn <- sum((test_r - pr_nn_org)^2)/nrow(test_homes2)
MSPE_nn
## [1] 2489.076
train <- read.csv("C:/Users/raze1/OneDrive/Desktop/UIndy/MSDA 622/Homework/Homework 5/train.csv")
test <- read.csv("C:/Users/raze1/OneDrive/Desktop/UIndy/MSDA 622/Homework/Homework 5/test.csv")
head(train)
## crim zn indus chas nox rm age dis rad tax ptratio black lstat
## 1 4.22239 0 18.10 1 0.770 5.803 89.0 1.9047 24 666 20.2 353.04 14.64
## 2 6.28807 0 18.10 0 0.740 6.341 96.4 2.0720 24 666 20.2 318.01 17.79
## 3 11.81230 0 18.10 0 0.718 6.824 76.5 1.7940 24 666 20.2 48.45 22.74
## 4 0.55778 0 21.89 0 0.624 6.335 98.2 2.1107 4 437 21.2 394.67 16.96
## 5 0.06162 0 4.39 0 0.442 5.898 52.3 8.0136 3 352 18.8 364.61 12.67
## 6 0.08265 0 13.92 0 0.437 6.127 18.4 5.5027 4 289 16.0 396.90 8.58
## medv
## 1 16.8
## 2 14.9
## 3 8.4
## 4 18.1
## 5 17.2
## 6 23.9
head(test)
## crim zn indus chas nox rm age dis rad tax ptratio black lstat
## 1 0.03237 0 2.18 0 0.458 6.998 45.8 6.0622 3 222 18.7 394.63 2.94
## 2 0.75026 0 8.14 0 0.538 5.924 94.1 4.3996 4 307 21.0 394.33 16.30
## 3 0.05188 0 4.49 0 0.449 6.015 45.1 4.4272 3 247 18.5 395.99 12.86
## 4 0.16902 0 25.65 0 0.581 5.986 88.4 1.9929 2 188 19.1 385.02 14.81
## 5 0.59005 0 21.89 0 0.624 6.372 97.9 2.3274 4 437 21.2 385.76 11.12
## 6 0.32264 0 21.89 0 0.624 5.942 93.5 1.9669 4 437 21.2 378.25 16.90
## medv
## 1 33.4
## 2 15.6
## 3 22.5
## 4 21.4
## 5 23.0
## 6 17.4
#Installing and loading the "glmnet" package.
library(glmnet)
## Warning: package 'glmnet' was built under R version 4.2.3
## Loading required package: Matrix
## Loaded glmnet 4.1-7
y <- train$medv
x <- data.matrix(train[, c('crim','zn','indus','chas','nox','rm','age','dis','rad','tax','ptratio','black','lstat')])
lreg <- glmnet(x = as.matrix(train[, -1]), y = train[, 1], alpha = 1)
summary(lreg)
## Length Class Mode
## a0 78 -none- numeric
## beta 1014 dgCMatrix S4
## df 78 -none- numeric
## dim 2 -none- numeric
## lambda 78 -none- numeric
## dev.ratio 78 -none- numeric
## nulldev 1 -none- numeric
## npasses 1 -none- numeric
## jerr 1 -none- numeric
## offset 1 -none- logical
## call 4 -none- call
## nobs 1 -none- numeric
lreg
##
## Call: glmnet(x = as.matrix(train[, -1]), y = train[, 1], alpha = 1)
##
## Df %Dev Lambda
## 1 0 0.00 5.1900
## 2 1 6.45 4.7290
## 3 1 11.80 4.3090
## 4 1 16.24 3.9260
## 5 1 19.93 3.5770
## 6 1 22.99 3.2590
## 7 1 25.53 2.9700
## 8 1 27.64 2.7060
## 9 1 29.39 2.4660
## 10 1 30.85 2.2460
## 11 2 32.33 2.0470
## 12 2 33.70 1.8650
## 13 3 35.00 1.6990
## 14 3 36.12 1.5480
## 15 4 37.08 1.4110
## 16 4 37.90 1.2860
## 17 4 38.58 1.1710
## 18 4 39.15 1.0670
## 19 4 39.62 0.9724
## 20 4 40.01 0.8861
## 21 4 40.33 0.8073
## 22 4 40.60 0.7356
## 23 5 40.84 0.6703
## 24 5 41.05 0.6107
## 25 5 41.23 0.5565
## 26 5 41.37 0.5070
## 27 5 41.49 0.4620
## 28 5 41.59 0.4209
## 29 5 41.67 0.3836
## 30 5 41.74 0.3495
## 31 5 41.80 0.3184
## 32 6 41.99 0.2901
## 33 6 42.15 0.2644
## 34 7 42.30 0.2409
## 35 7 42.42 0.2195
## 36 7 42.53 0.2000
## 37 7 42.61 0.1822
## 38 8 42.71 0.1660
## 39 10 42.85 0.1513
## 40 10 43.05 0.1378
## 41 10 43.20 0.1256
## 42 10 43.34 0.1144
## 43 10 43.44 0.1043
## 44 11 43.54 0.0950
## 45 11 43.62 0.0866
## 46 11 43.69 0.0789
## 47 11 43.75 0.0719
## 48 11 43.80 0.0655
## 49 12 43.84 0.0597
## 50 12 43.87 0.0544
## 51 12 43.90 0.0495
## 52 12 43.92 0.0451
## 53 13 43.94 0.0411
## 54 13 43.97 0.0375
## 55 13 43.99 0.0341
## 56 13 44.01 0.0311
## 57 13 44.02 0.0284
## 58 13 44.04 0.0258
## 59 13 44.05 0.0235
## 60 13 44.05 0.0214
## 61 13 44.06 0.0195
## 62 13 44.07 0.0178
## 63 13 44.07 0.0162
## 64 13 44.08 0.0148
## 65 13 44.08 0.0135
## 66 13 44.08 0.0123
## 67 13 44.09 0.0112
## 68 13 44.09 0.0102
## 69 13 44.09 0.0093
## 70 13 44.09 0.0085
## 71 13 44.09 0.0077
## 72 13 44.09 0.0070
## 73 13 44.09 0.0064
## 74 13 44.10 0.0058
## 75 13 44.10 0.0053
## 76 13 44.10 0.0048
## 77 13 44.10 0.0044
## 78 13 44.10 0.0040
lambda_info <- cv.glmnet(x = as.matrix(train[, -1]), y = train[, 1], alpha = 1)
min_lambda <- lambda_info$lambda.min
min_lambda #Optimal Lambda
## [1] 0.02144368
?cv.glmnet
## starting httpd help server ... done
coef(lreg, s = min_lambda)
## 14 x 1 sparse Matrix of class "dgCMatrix"
## s1
## (Intercept) 16.034400935
## zn 0.039325365
## indus -0.063250850
## chas -0.610974946
## nox -8.798949703
## rm 0.281810487
## age 0.003759749
## dis -0.865039126
## rad 0.533329534
## tax -0.001691356
## ptratio -0.221891091
## black -0.009493924
## lstat 0.082012866
## medv -0.179729165
y_predicted <- predict(lreg, s = min_lambda, newx = as.matrix(train[,-1]))
mean((train[,1] - y_predicted)^2)
## [1] 39.69084
newx <- data.matrix(test[, c('crim','zn','indus','chas','nox','rm','age','dis','rad','tax','ptratio','black','lstat')])
predict(lreg, s = lambda_info$lambda.min, newx = newx)
## s1
## [1,] -39.13261
## [2,] -106.70544
## [3,] -48.03265
## [4,] -87.83625
## [5,] -148.09864
## [6,] -146.15161
## [7,] -143.27858
## [8,] -139.52651
## [9,] -142.95132
## [10,] -92.64151
## [11,] -47.97136
## [12,] -74.83834
## [13,] -64.59155
## [14,] -94.99423
## [15,] -97.76891
## [16,] -93.39821
## [17,] -41.09366
## [18,] -59.93715
## [19,] -202.27575
## [20,] -199.96856
## [21,] -199.02268
## [22,] -192.10493
## [23,] -200.55272
## [24,] -217.79289
## [25,] -101.61431
## [26,] -96.71114
Age and Tax