Modelo Regresión Lineal Multivariable
#ver librerias
Warning message:
R graphics engine version 15 is not supported by this version of RStudio. The Plots tab will be disabled until a newer version of RStudio is installed.
library("datasets")
# modelo lineal
ruta="./data/BASEDATOS_MODELO LINEAL.xlsx"
library(readxl)
hoja1=read_excel(ruta)
datos=data.frame(hoja1)
datos
Train (entrenar) y Test
nrow(datos)
[1] 474
particion=sample(nrow(datos),0.8*nrow(datos),
replace=FALSE)
particion
[1] 5 213 203 450 126 269 96 416 67 466 30 141 251
[14] 372 4 418 97 70 433 184 147 91 221 207 443 330
[27] 168 440 162 149 302 88 311 12 337 363 334 274 139
[40] 278 338 155 103 262 144 467 194 329 232 100 169 26
[53] 226 74 402 63 461 428 426 206 198 101 8 264 229
[66] 316 190 389 58 444 448 289 1 308 22 225 176 245
[79] 7 131 15 243 301 451 24 18 233 281 77 252 242
[92] 368 3 404 282 98 366 384 159 286 294 275 45 231
[105] 138 171 257 449 253 89 95 165 345 254 432 57 261
[118] 307 14 250 199 263 36 87 429 346 377 300 33 438
[131] 348 299 179 145 291 320 386 23 127 153 277 341 321
[144] 405 102 109 128 352 369 105 349 40 64 166 351 142
[157] 47 258 284 31 204 456 247 439 112 130 260 76 459
[170] 80 455 143 42 395 465 419 222 452 434 304 412 446
[183] 362 454 333 61 72 471 29 134 315 267 2 325 122
[196] 220 92 280 413 406 116 425 463 306 310 298 323 293
[209] 431 457 174 458 296 445 331 85 407 10 154 82 313
[222] 246 151 11 68 132 25 69 38 383 414 156 164 121
[235] 287 424 71 223 230 188 472 37 158 336 54 240 470
[248] 32 312 208 382 119 305 401 182 52 410 78 273 460
[261] 249 94 309 56 209 290 327 27 185 241 21 285 409
[274] 283 178 161 6 328 43 272 192 65 191 173 35 239
[287] 292 146 201 152 197 20 297 180 84 110 218 417 415
[300] 265 117 437 396 473 49 399 378 93 120 167 367 339
[313] 160 175 427 181 385 411 357 422 183 99 469 353 163
[326] 442 397 210 16 340 238 219 236 354 79 170 303 474
[339] 447 288 48 83 392 393 436 235 224 195 259 371 193
[352] 468 271 314 356 53 172 140 28 380 111 374 400 441
[365] 279 430 359 19 248 464 73 34 148 376 266 118 360
[378] 388 375
train=datos[particion,]
test=datos[-particion,]
test
#diseño de las etiquetas
plot(train)
attach(train)
The following object is masked _by_ .GlobalEnv:
experiencia
plot(train$experiencia,train$salario_actual)
cor.test(salario_actual,experiencia)
Pearson's product-moment correlation
data: salario_actual and experiencia
t = 67.097, df = 377, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.9519715 0.9676859
sample estimates:
cor
0.960589
Crear modelo
modelo=lm(salario_actual~experiencia)
modelo
Call:
lm(formula = salario_actual ~ experiencia)
Coefficients:
(Intercept) experiencia
-2753.23 82.34
Resumen estadistico del modelo
summary(modelo)
Call:
lm(formula = salario_actual ~ experiencia)
Residuals:
Min 1Q Median 3Q Max
-2232.2 -1012.3 175.4 1101.9 1903.0
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -2753.234 152.911 -18.0 <2e-16 ***
experiencia 82.343 1.227 67.1 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1173 on 377 degrees of freedom
Multiple R-squared: 0.9227, Adjusted R-squared: 0.9225
F-statistic: 4502 on 1 and 377 DF, p-value: < 2.2e-16
experiencia=test$experiencia
experiencia
[1] 101 71 183 155 58 82 99 145 178 193 195 176 117 104
[15] 149 74 70 88 112 183 83 177 158 102 99 32 69 105
[29] 186 86 92 148 79 93 40 127 159 129 43 71 65 61
[43] 100 177 59 80 57 145 179 131 162 159 152 97 95 152
[57] 130 60 55 117 102 76 30 68 154 64 134 68 114 35
[71] 49 118 84 194 45 113 188 50 44 69 182 125 158 119
[85] 91 197 128 164 92 198 173 184 51 67 179
Enviar data de experience al modelo para predecir salario actual
(predecir=predict(modelo,list(experiencia=test$experiencia)))
1 2 3 4 5
5563.4038 3093.1154 12315.5256 10009.9230 2022.6571
6 7 8 9 10
3998.8878 5398.7179 9186.4935 11903.8108 13138.9550
11 12 13 14 15
13303.6409 11739.1249 6880.8910 5810.4327 9515.8653
16 17 18 19 20
3340.1442 3010.7724 4492.9455 6469.1763 12315.5256
21 22 23 24 25
4081.2308 11821.4679 10256.9519 5645.7468 5398.7179
26 27 28 29 30
-118.2596 2928.4295 5892.7756 12562.5544 4328.2596
31 32 33 34 35
4822.3173 9433.5224 3751.8590 4904.6602 540.4840
36 37 38 39 40
7704.3205 10339.2948 7869.0064 787.5128 3093.1154
41 42 43 44 45
2599.0577 2269.6859 5481.0609 11821.4679 2105.0000
46 47 48 49 50
3834.2019 1940.3141 9186.4935 11986.1538 8033.6923
51 52 53 54 55
10586.3236 10339.2948 9762.8942 5234.0320 5069.3461
56 57 58 59 60
9762.8942 7951.3493 2187.3430 1775.6282 6880.8910
61 62 63 64 65
5645.7468 3504.8301 -282.9455 2846.0865 9927.5801
66 67 68 69 70
2516.7148 8280.7211 2846.0865 6633.8621 128.7693
71 72 73 74 75
1281.5705 6963.2339 4163.5737 13221.2980 952.1987
76 77 78 79 80
6551.5192 12727.2403 1363.9135 869.8558 2928.4295
81 82 83 84 85
12233.1826 7539.6346 10256.9519 7045.5769 4739.9743
86 87 88 89 90
13468.3268 7786.6634 10751.0095 4822.3173 13550.6698
91 92 93 94 95
11492.0961 12397.8685 1446.2564 2763.7436 11986.1538
Ver matriz valor real vs predecir
(matriz=data.frame(test$salario_actual,predecir))
#grafico
Warning messages:
1: In readChar(file, size, TRUE) : truncating string with embedded nuls
2: In readChar(file, size, TRUE) : truncating string with embedded nuls
plot(test$experiencia,test$salario_actual,pch=20)
lines(test$experiencia,predecir,col=2)
experiencia=c(100:150)
experiencia
[1] 100 101 102 103 104 105 106 107 108 109 110 111 112 113
[15] 114 115 116 117 118 119 120 121 122 123 124 125 126 127
[29] 128 129 130 131 132 133 134 135 136 137 138 139 140 141
[43] 142 143 144 145 146 147 148 149 150
#enviar data nueva
predict(modelo,experiencia=c(100:150))
1 2 3 4 5
9411.6713 6675.8008 2842.4365 1791.5732 1846.4654
6 7 8 9 10
1288.0905 1740.8119 5844.3631 5723.5844 10734.3423
11 12 13 14 15
13798.1677 2584.4989 1385.4824 12619.5191 2381.4535
16 17 18 19 20
3603.8569 5977.3912 4837.2546 11944.4963 6043.2777
21 22 23 24 25
6675.8008 3701.2487 13918.9464 4009.9477 10313.1263
26 27 28 29 30
2322.5736 1010.8969 6550.8913 2284.0616 4486.0559
31 32 33 34 35
11413.4959 6347.8459 5161.0787 2229.1694 11838.8427
36 37 38 39 40
1389.6132 5414.8855 4568.3226 3415.9366 10035.9327
41 42 43 44 45
7659.5226 4888.0159 2537.8684 1823.0786 7382.3290
46 47 48 49 50
3244.3965 6301.2153 10512.0409 1897.2268 1015.0277
51 52 53 54 55
2455.6017 6620.9086 14648.8614 3064.7379 11362.7345
56 57 58 59 60
3724.6356 1721.5559 2252.5563 9887.7795 2654.5163
61 62 63 64 65
6421.9940 4989.5386 862.7437 2311.4362 7128.5222
66 67 68 69 70
6910.3516 5040.3000 6605.7835 7706.1531 13064.1219
71 72 73 74 75
3775.3969 2146.9027 12319.0818 8560.9776 3420.0674
76 77 78 79 80
1487.0051 9462.4326 2893.1979 10516.1717 3857.6637
81 82 83 84 85
8061.4826 15393.9015 4431.1637 12896.7127 3638.2380
86 87 88 89 90
15023.4468 7913.3293 2639.3911 7405.7158 2252.5563
91 92 93 94 95
14153.4972 6796.5795 2354.0790 10613.5636 13743.2755
96 97 98 99 100
7335.6984 14121.9918 7077.7608 9286.7617 4298.1356
101 102 103 104 105
8385.3067 4162.2318 12947.4740 7433.0903 589.6809
106 107 108 109 110
10632.8196 2209.9134 1397.7317 2127.6467 9013.6990
111 112 113 114 115
9938.5408 13899.6904 2994.7206 12740.2978 4228.1183
116 117 118 119 120
3009.8458 9286.7617 4583.4478 6925.4768 11464.2573
121 122 123 124 125
1097.2944 4942.9081 5368.2549 5211.8400 1416.9877
126 127 128 129 130
1674.9254 2806.8003 5742.8404 2482.9762 8233.0226
131 132 133 134 135
12994.1045 3587.4767 4056.5782 3330.7941 4348.8970
136 137 138 139 140
5617.9309 964.2664 4462.6691 2912.4539 6707.3062
141 142 143 144 145
4310.3850 7538.7439 3720.5047 6722.4313 4365.2772
146 147 148 149 150
3650.4874 1791.5732 13794.0369 11741.4509 1163.1810
151 152 153 154 155
12299.8258 12471.3658 3092.1125 8237.1535 4107.3396
156 157 158 159 160
5840.2323 3162.1298 11515.0186 5820.9763 14274.2759
161 162 163 164 165
7862.5680 1537.7664 3498.2033 3400.8115 10835.8650
166 167 168 169 170
1264.7037 13946.3209 7487.9825 6773.1927 4411.9077
171 172 173 174 175
6199.6926 7811.8066 2010.9989 3654.6182 12264.1896
176 177 178 179 180
1233.1983 6620.9086 4310.3850 11316.1040 3013.9766
181 182 183 184 185
1690.0505 7284.9371 3299.2887 1588.5278 5040.3000
186 187 188 189 190
12892.5818 11991.1268 11519.1494 3232.1472 3420.0674
191 192 193 194 195
10262.3649 3314.4139 1218.0731 15190.8561 6043.2777
196 197 198 199 200
12092.6495 15070.0774 5687.9482 10308.9955 12365.7123
201 202 203 204 205
11686.5587 1725.6867 11483.5132 11686.5587 7534.6130
206 207 208 209 210
8436.0681 13298.6727 8662.5003 6195.5618 12046.0190
211 212 213 214 215
3483.0782 6402.7381 2049.5108 6555.0221 6449.3686
216 217 218 219 220
4790.6240 7507.2385 1338.8518 7405.7158 1541.8973
221 222 223 224 225
7483.8517 11210.4505 4532.6864 5941.7550 2502.2322
226 227 228 229 230
10160.8422 6574.2781 11991.1268 5637.1869 4602.7038
231 232 233 234 235
13997.0823 5742.8404 4837.2546 2013.8746 7557.9999
236 237 238 239 240
5719.4536 2607.8857 10687.7118 5945.8859 5469.7777
241 242 243 244 245
1624.1640 4130.7264 1975.3626 11565.7800 11362.7345
246 247 248 249 250
10465.4104 2603.7549 4486.0559 3572.3515 7964.0907
251 252 253 254 255
11585.0360 10262.3649 11620.6722 14820.4014 2482.9762
256 257 258 259 260
1065.7891 6929.6076 2076.8854 3299.2887 7858.4372
261 262 263 264 265
9189.3699 3072.8565 10617.6944 8764.0230 1588.5278
266 267 268 269 270
4177.3569 5996.6472 5414.8855 13216.4059 7179.2835
271 272 273 274 275
1315.4650 12010.3828 3618.9820 12693.6672 7932.5853
276 277 278 279 280
8537.5908 13322.0595 2045.3800 3923.5502 11265.3427
281 282 283 284 285
5789.4709 2076.8854 636.3114 5641.3177 6726.5622
286 287 288 289 290
14047.8436 9731.3645 15721.8564 7026.9995 10785.1036
291 292 293 294 295
2096.1413 3470.8288 4107.3396 13423.5822 6703.1754
296 297 298 299 300
13747.4063 9985.1713 8283.7840 8759.8922 2709.4085
301 302 303 304 305
2119.5282 6874.7154 3673.8742 7026.9995 4993.6695
306 307 308 309 310
7909.1985 1213.9423 12841.8205 14223.5145 3908.4250
311 312 313 314 315
2299.1868 2537.8684 10613.5636 8709.1308 1740.8119
316 317 318 319 320
2400.7095 12213.4282 5617.9309 15343.1401 13946.3209
321 322 323 324 325
3041.3511 7507.2385 11316.1040 1366.2264 4809.8800
326 327 328 329 330
1998.7495 6504.2608 5977.3912 8556.8468 11893.7349
331 332 333 334 335
3548.9647 7179.2835 3826.1583 11620.6722 3248.5274
336 337 338 339 340
2963.2152 1928.7321 9356.7791 13641.7528 2994.7206
341 342 343 344 345
3802.7715 894.2490 5590.5564 13571.7354 4126.5956
346 347 348 349 350
11569.9108 913.5050 5789.4709 13419.4514 3244.3965
351 352 353 354 355
4025.0729 3838.4077 1873.8399 1905.3453 7483.8517
356 357 358 359 360
7604.6304 14477.3213 5875.8685 15666.9642 7913.3293
361 362 363 364 365
13470.2127 3267.7834 9513.1940 7433.0903 2576.3804
366 367 368 369 370
11261.2118 10839.9958 2299.1868 8713.2617 1334.7210
371 372 373 374 375
4684.9705 11639.9281 3908.4250 15717.7256 13626.6276
376 377 378 379
7553.8690 13317.9287 9134.4777 15398.0323
predict
function (object, ...)
UseMethod("predict")
<bytecode: 0x000001fa3d627de0>
<environment: namespace:stats>
Modelo Multivariado 1
# ***** modelo lineal multiple *******
modelo=lm(train$salario_actual~train$experiencia+train$antiguedad+train$sexo)
modelo
Call:
lm(formula = train$salario_actual ~ train$experiencia + train$antiguedad +
train$sexo)
Coefficients:
(Intercept) train$experiencia train$antiguedad
-1214.484 50.761 273.063
train$sexo
4.131
summary(modelo)
Call:
lm(formula = train$salario_actual ~ train$experiencia + train$antiguedad +
train$sexo)
Residuals:
Min 1Q Median 3Q Max
-2257.0 -584.5 13.1 652.6 1725.5
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1214.484 188.121 -6.456 3.33e-10 ***
train$experiencia 50.761 1.921 26.420 < 2e-16 ***
train$antiguedad 273.063 14.728 18.541 < 2e-16 ***
train$sexo 4.131 87.547 0.047 0.962
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 849.3 on 375 degrees of freedom
Multiple R-squared: 0.9597, Adjusted R-squared: 0.9594
F-statistic: 2976 on 3 and 375 DF, p-value: < 2.2e-16
Modelo Multivariado Version 2
Enviar todas la etiquetas de las variables que no descartamos.
modelo2=lm(train$salario_actual~train$experiencia+train$antiguedad)
modelo2
Call:
lm(formula = train$salario_actual ~ train$experiencia + train$antiguedad)
Coefficients:
(Intercept) train$experiencia train$antiguedad
-1208.48 50.76 273.06
summary(modelo2)
Call:
lm(formula = train$salario_actual ~ train$experiencia + train$antiguedad)
Residuals:
Min 1Q Median 3Q Max
-2258.80 -584.31 11.21 652.82 1723.62
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1208.482 138.400 -8.732 <2e-16 ***
train$experiencia 50.762 1.919 26.456 <2e-16 ***
train$antiguedad 273.057 14.708 18.565 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 848.2 on 376 degrees of freedom
Multiple R-squared: 0.9597, Adjusted R-squared: 0.9595
F-statistic: 4475 on 2 and 376 DF, p-value: < 2.2e-16