Modelo Regresión Lineal Multivariable

#ver librerias
Warning message:
R graphics engine version 15 is not supported by this version of RStudio. The Plots tab will be disabled until a newer version of RStudio is installed. 
library("datasets")
#    modelo lineal
ruta="./data/BASEDATOS_MODELO LINEAL.xlsx"
library(readxl)
hoja1=read_excel(ruta)
datos=data.frame(hoja1)
datos

Train (entrenar) y Test

nrow(datos)
[1] 474
particion=sample(nrow(datos),0.8*nrow(datos),
                 replace=FALSE)
particion
  [1]   5 213 203 450 126 269  96 416  67 466  30 141 251
 [14] 372   4 418  97  70 433 184 147  91 221 207 443 330
 [27] 168 440 162 149 302  88 311  12 337 363 334 274 139
 [40] 278 338 155 103 262 144 467 194 329 232 100 169  26
 [53] 226  74 402  63 461 428 426 206 198 101   8 264 229
 [66] 316 190 389  58 444 448 289   1 308  22 225 176 245
 [79]   7 131  15 243 301 451  24  18 233 281  77 252 242
 [92] 368   3 404 282  98 366 384 159 286 294 275  45 231
[105] 138 171 257 449 253  89  95 165 345 254 432  57 261
[118] 307  14 250 199 263  36  87 429 346 377 300  33 438
[131] 348 299 179 145 291 320 386  23 127 153 277 341 321
[144] 405 102 109 128 352 369 105 349  40  64 166 351 142
[157]  47 258 284  31 204 456 247 439 112 130 260  76 459
[170]  80 455 143  42 395 465 419 222 452 434 304 412 446
[183] 362 454 333  61  72 471  29 134 315 267   2 325 122
[196] 220  92 280 413 406 116 425 463 306 310 298 323 293
[209] 431 457 174 458 296 445 331  85 407  10 154  82 313
[222] 246 151  11  68 132  25  69  38 383 414 156 164 121
[235] 287 424  71 223 230 188 472  37 158 336  54 240 470
[248]  32 312 208 382 119 305 401 182  52 410  78 273 460
[261] 249  94 309  56 209 290 327  27 185 241  21 285 409
[274] 283 178 161   6 328  43 272 192  65 191 173  35 239
[287] 292 146 201 152 197  20 297 180  84 110 218 417 415
[300] 265 117 437 396 473  49 399 378  93 120 167 367 339
[313] 160 175 427 181 385 411 357 422 183  99 469 353 163
[326] 442 397 210  16 340 238 219 236 354  79 170 303 474
[339] 447 288  48  83 392 393 436 235 224 195 259 371 193
[352] 468 271 314 356  53 172 140  28 380 111 374 400 441
[365] 279 430 359  19 248 464  73  34 148 376 266 118 360
[378] 388 375
train=datos[particion,]
test=datos[-particion,]
test
#diseño de las etiquetas
plot(train)

attach(train)
The following object is masked _by_ .GlobalEnv:

    experiencia
plot(train$experiencia,train$salario_actual)

cor.test(salario_actual,experiencia)

    Pearson's product-moment correlation

data:  salario_actual and experiencia
t = 67.097, df = 377, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.9519715 0.9676859
sample estimates:
     cor 
0.960589 

Crear modelo

modelo=lm(salario_actual~experiencia)
modelo

Call:
lm(formula = salario_actual ~ experiencia)

Coefficients:
(Intercept)  experiencia  
   -2753.23        82.34  

Resumen estadistico del modelo

summary(modelo)

Call:
lm(formula = salario_actual ~ experiencia)

Residuals:
    Min      1Q  Median      3Q     Max 
-2232.2 -1012.3   175.4  1101.9  1903.0 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) -2753.234    152.911   -18.0   <2e-16 ***
experiencia    82.343      1.227    67.1   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 1173 on 377 degrees of freedom
Multiple R-squared:  0.9227,    Adjusted R-squared:  0.9225 
F-statistic:  4502 on 1 and 377 DF,  p-value: < 2.2e-16
experiencia=test$experiencia
experiencia
 [1] 101  71 183 155  58  82  99 145 178 193 195 176 117 104
[15] 149  74  70  88 112 183  83 177 158 102  99  32  69 105
[29] 186  86  92 148  79  93  40 127 159 129  43  71  65  61
[43] 100 177  59  80  57 145 179 131 162 159 152  97  95 152
[57] 130  60  55 117 102  76  30  68 154  64 134  68 114  35
[71]  49 118  84 194  45 113 188  50  44  69 182 125 158 119
[85]  91 197 128 164  92 198 173 184  51  67 179

Enviar data de experience al modelo para predecir salario actual

(predecir=predict(modelo,list(experiencia=test$experiencia)))
         1          2          3          4          5 
 5563.4038  3093.1154 12315.5256 10009.9230  2022.6571 
         6          7          8          9         10 
 3998.8878  5398.7179  9186.4935 11903.8108 13138.9550 
        11         12         13         14         15 
13303.6409 11739.1249  6880.8910  5810.4327  9515.8653 
        16         17         18         19         20 
 3340.1442  3010.7724  4492.9455  6469.1763 12315.5256 
        21         22         23         24         25 
 4081.2308 11821.4679 10256.9519  5645.7468  5398.7179 
        26         27         28         29         30 
 -118.2596  2928.4295  5892.7756 12562.5544  4328.2596 
        31         32         33         34         35 
 4822.3173  9433.5224  3751.8590  4904.6602   540.4840 
        36         37         38         39         40 
 7704.3205 10339.2948  7869.0064   787.5128  3093.1154 
        41         42         43         44         45 
 2599.0577  2269.6859  5481.0609 11821.4679  2105.0000 
        46         47         48         49         50 
 3834.2019  1940.3141  9186.4935 11986.1538  8033.6923 
        51         52         53         54         55 
10586.3236 10339.2948  9762.8942  5234.0320  5069.3461 
        56         57         58         59         60 
 9762.8942  7951.3493  2187.3430  1775.6282  6880.8910 
        61         62         63         64         65 
 5645.7468  3504.8301  -282.9455  2846.0865  9927.5801 
        66         67         68         69         70 
 2516.7148  8280.7211  2846.0865  6633.8621   128.7693 
        71         72         73         74         75 
 1281.5705  6963.2339  4163.5737 13221.2980   952.1987 
        76         77         78         79         80 
 6551.5192 12727.2403  1363.9135   869.8558  2928.4295 
        81         82         83         84         85 
12233.1826  7539.6346 10256.9519  7045.5769  4739.9743 
        86         87         88         89         90 
13468.3268  7786.6634 10751.0095  4822.3173 13550.6698 
        91         92         93         94         95 
11492.0961 12397.8685  1446.2564  2763.7436 11986.1538 

Ver matriz valor real vs predecir

(matriz=data.frame(test$salario_actual,predecir))
#grafico
Warning messages:
1: In readChar(file, size, TRUE) : truncating string with embedded nuls
2: In readChar(file, size, TRUE) : truncating string with embedded nuls
plot(test$experiencia,test$salario_actual,pch=20)
lines(test$experiencia,predecir,col=2)

experiencia=c(100:150)
experiencia
 [1] 100 101 102 103 104 105 106 107 108 109 110 111 112 113
[15] 114 115 116 117 118 119 120 121 122 123 124 125 126 127
[29] 128 129 130 131 132 133 134 135 136 137 138 139 140 141
[43] 142 143 144 145 146 147 148 149 150
#enviar data nueva
predict(modelo,experiencia=c(100:150))
         1          2          3          4          5 
 9411.6713  6675.8008  2842.4365  1791.5732  1846.4654 
         6          7          8          9         10 
 1288.0905  1740.8119  5844.3631  5723.5844 10734.3423 
        11         12         13         14         15 
13798.1677  2584.4989  1385.4824 12619.5191  2381.4535 
        16         17         18         19         20 
 3603.8569  5977.3912  4837.2546 11944.4963  6043.2777 
        21         22         23         24         25 
 6675.8008  3701.2487 13918.9464  4009.9477 10313.1263 
        26         27         28         29         30 
 2322.5736  1010.8969  6550.8913  2284.0616  4486.0559 
        31         32         33         34         35 
11413.4959  6347.8459  5161.0787  2229.1694 11838.8427 
        36         37         38         39         40 
 1389.6132  5414.8855  4568.3226  3415.9366 10035.9327 
        41         42         43         44         45 
 7659.5226  4888.0159  2537.8684  1823.0786  7382.3290 
        46         47         48         49         50 
 3244.3965  6301.2153 10512.0409  1897.2268  1015.0277 
        51         52         53         54         55 
 2455.6017  6620.9086 14648.8614  3064.7379 11362.7345 
        56         57         58         59         60 
 3724.6356  1721.5559  2252.5563  9887.7795  2654.5163 
        61         62         63         64         65 
 6421.9940  4989.5386   862.7437  2311.4362  7128.5222 
        66         67         68         69         70 
 6910.3516  5040.3000  6605.7835  7706.1531 13064.1219 
        71         72         73         74         75 
 3775.3969  2146.9027 12319.0818  8560.9776  3420.0674 
        76         77         78         79         80 
 1487.0051  9462.4326  2893.1979 10516.1717  3857.6637 
        81         82         83         84         85 
 8061.4826 15393.9015  4431.1637 12896.7127  3638.2380 
        86         87         88         89         90 
15023.4468  7913.3293  2639.3911  7405.7158  2252.5563 
        91         92         93         94         95 
14153.4972  6796.5795  2354.0790 10613.5636 13743.2755 
        96         97         98         99        100 
 7335.6984 14121.9918  7077.7608  9286.7617  4298.1356 
       101        102        103        104        105 
 8385.3067  4162.2318 12947.4740  7433.0903   589.6809 
       106        107        108        109        110 
10632.8196  2209.9134  1397.7317  2127.6467  9013.6990 
       111        112        113        114        115 
 9938.5408 13899.6904  2994.7206 12740.2978  4228.1183 
       116        117        118        119        120 
 3009.8458  9286.7617  4583.4478  6925.4768 11464.2573 
       121        122        123        124        125 
 1097.2944  4942.9081  5368.2549  5211.8400  1416.9877 
       126        127        128        129        130 
 1674.9254  2806.8003  5742.8404  2482.9762  8233.0226 
       131        132        133        134        135 
12994.1045  3587.4767  4056.5782  3330.7941  4348.8970 
       136        137        138        139        140 
 5617.9309   964.2664  4462.6691  2912.4539  6707.3062 
       141        142        143        144        145 
 4310.3850  7538.7439  3720.5047  6722.4313  4365.2772 
       146        147        148        149        150 
 3650.4874  1791.5732 13794.0369 11741.4509  1163.1810 
       151        152        153        154        155 
12299.8258 12471.3658  3092.1125  8237.1535  4107.3396 
       156        157        158        159        160 
 5840.2323  3162.1298 11515.0186  5820.9763 14274.2759 
       161        162        163        164        165 
 7862.5680  1537.7664  3498.2033  3400.8115 10835.8650 
       166        167        168        169        170 
 1264.7037 13946.3209  7487.9825  6773.1927  4411.9077 
       171        172        173        174        175 
 6199.6926  7811.8066  2010.9989  3654.6182 12264.1896 
       176        177        178        179        180 
 1233.1983  6620.9086  4310.3850 11316.1040  3013.9766 
       181        182        183        184        185 
 1690.0505  7284.9371  3299.2887  1588.5278  5040.3000 
       186        187        188        189        190 
12892.5818 11991.1268 11519.1494  3232.1472  3420.0674 
       191        192        193        194        195 
10262.3649  3314.4139  1218.0731 15190.8561  6043.2777 
       196        197        198        199        200 
12092.6495 15070.0774  5687.9482 10308.9955 12365.7123 
       201        202        203        204        205 
11686.5587  1725.6867 11483.5132 11686.5587  7534.6130 
       206        207        208        209        210 
 8436.0681 13298.6727  8662.5003  6195.5618 12046.0190 
       211        212        213        214        215 
 3483.0782  6402.7381  2049.5108  6555.0221  6449.3686 
       216        217        218        219        220 
 4790.6240  7507.2385  1338.8518  7405.7158  1541.8973 
       221        222        223        224        225 
 7483.8517 11210.4505  4532.6864  5941.7550  2502.2322 
       226        227        228        229        230 
10160.8422  6574.2781 11991.1268  5637.1869  4602.7038 
       231        232        233        234        235 
13997.0823  5742.8404  4837.2546  2013.8746  7557.9999 
       236        237        238        239        240 
 5719.4536  2607.8857 10687.7118  5945.8859  5469.7777 
       241        242        243        244        245 
 1624.1640  4130.7264  1975.3626 11565.7800 11362.7345 
       246        247        248        249        250 
10465.4104  2603.7549  4486.0559  3572.3515  7964.0907 
       251        252        253        254        255 
11585.0360 10262.3649 11620.6722 14820.4014  2482.9762 
       256        257        258        259        260 
 1065.7891  6929.6076  2076.8854  3299.2887  7858.4372 
       261        262        263        264        265 
 9189.3699  3072.8565 10617.6944  8764.0230  1588.5278 
       266        267        268        269        270 
 4177.3569  5996.6472  5414.8855 13216.4059  7179.2835 
       271        272        273        274        275 
 1315.4650 12010.3828  3618.9820 12693.6672  7932.5853 
       276        277        278        279        280 
 8537.5908 13322.0595  2045.3800  3923.5502 11265.3427 
       281        282        283        284        285 
 5789.4709  2076.8854   636.3114  5641.3177  6726.5622 
       286        287        288        289        290 
14047.8436  9731.3645 15721.8564  7026.9995 10785.1036 
       291        292        293        294        295 
 2096.1413  3470.8288  4107.3396 13423.5822  6703.1754 
       296        297        298        299        300 
13747.4063  9985.1713  8283.7840  8759.8922  2709.4085 
       301        302        303        304        305 
 2119.5282  6874.7154  3673.8742  7026.9995  4993.6695 
       306        307        308        309        310 
 7909.1985  1213.9423 12841.8205 14223.5145  3908.4250 
       311        312        313        314        315 
 2299.1868  2537.8684 10613.5636  8709.1308  1740.8119 
       316        317        318        319        320 
 2400.7095 12213.4282  5617.9309 15343.1401 13946.3209 
       321        322        323        324        325 
 3041.3511  7507.2385 11316.1040  1366.2264  4809.8800 
       326        327        328        329        330 
 1998.7495  6504.2608  5977.3912  8556.8468 11893.7349 
       331        332        333        334        335 
 3548.9647  7179.2835  3826.1583 11620.6722  3248.5274 
       336        337        338        339        340 
 2963.2152  1928.7321  9356.7791 13641.7528  2994.7206 
       341        342        343        344        345 
 3802.7715   894.2490  5590.5564 13571.7354  4126.5956 
       346        347        348        349        350 
11569.9108   913.5050  5789.4709 13419.4514  3244.3965 
       351        352        353        354        355 
 4025.0729  3838.4077  1873.8399  1905.3453  7483.8517 
       356        357        358        359        360 
 7604.6304 14477.3213  5875.8685 15666.9642  7913.3293 
       361        362        363        364        365 
13470.2127  3267.7834  9513.1940  7433.0903  2576.3804 
       366        367        368        369        370 
11261.2118 10839.9958  2299.1868  8713.2617  1334.7210 
       371        372        373        374        375 
 4684.9705 11639.9281  3908.4250 15717.7256 13626.6276 
       376        377        378        379 
 7553.8690 13317.9287  9134.4777 15398.0323 
predict
function (object, ...) 
UseMethod("predict")
<bytecode: 0x000001fa3d627de0>
<environment: namespace:stats>

Modelo Multivariado 1

# ***** modelo lineal multiple *******
modelo=lm(train$salario_actual~train$experiencia+train$antiguedad+train$sexo)
modelo

Call:
lm(formula = train$salario_actual ~ train$experiencia + train$antiguedad + 
    train$sexo)

Coefficients:
      (Intercept)  train$experiencia   train$antiguedad  
        -1214.484             50.761            273.063  
       train$sexo  
            4.131  
summary(modelo)

Call:
lm(formula = train$salario_actual ~ train$experiencia + train$antiguedad + 
    train$sexo)

Residuals:
    Min      1Q  Median      3Q     Max 
-2257.0  -584.5    13.1   652.6  1725.5 

Coefficients:
                   Estimate Std. Error t value Pr(>|t|)    
(Intercept)       -1214.484    188.121  -6.456 3.33e-10 ***
train$experiencia    50.761      1.921  26.420  < 2e-16 ***
train$antiguedad    273.063     14.728  18.541  < 2e-16 ***
train$sexo            4.131     87.547   0.047    0.962    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 849.3 on 375 degrees of freedom
Multiple R-squared:  0.9597,    Adjusted R-squared:  0.9594 
F-statistic:  2976 on 3 and 375 DF,  p-value: < 2.2e-16

Modelo Multivariado Version 2

Enviar todas la etiquetas de las variables que no descartamos.

modelo2=lm(train$salario_actual~train$experiencia+train$antiguedad)
modelo2

Call:
lm(formula = train$salario_actual ~ train$experiencia + train$antiguedad)

Coefficients:
      (Intercept)  train$experiencia   train$antiguedad  
         -1208.48              50.76             273.06  
summary(modelo2)

Call:
lm(formula = train$salario_actual ~ train$experiencia + train$antiguedad)

Residuals:
     Min       1Q   Median       3Q      Max 
-2258.80  -584.31    11.21   652.82  1723.62 

Coefficients:
                   Estimate Std. Error t value Pr(>|t|)    
(Intercept)       -1208.482    138.400  -8.732   <2e-16 ***
train$experiencia    50.762      1.919  26.456   <2e-16 ***
train$antiguedad    273.057     14.708  18.565   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 848.2 on 376 degrees of freedom
Multiple R-squared:  0.9597,    Adjusted R-squared:  0.9595 
F-statistic:  4475 on 2 and 376 DF,  p-value: < 2.2e-16
LS0tDQp0aXRsZTogIlIgTm90ZWJvb2sgZGUgU2VtYW5hMDU6IE1vZGVsbyBMaW5lYWwgTXVsdGl2YXJpYWJsZSwgcG9yIENhbmNoby1Sb2Ryw61ndWV6LCBFcm5lc3RvIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KTW9kZWxvIFJlZ3Jlc2nDs24gTGluZWFsIE11bHRpdmFyaWFibGUNCg0KYGBge3J9DQojdmVyIGxpYnJlcmlhcw0KbGlicmFyeSgiZGF0YXNldHMiKQ0KDQojICAgIG1vZGVsbyBsaW5lYWwNCg0KcnV0YT0iLi9kYXRhL0JBU0VEQVRPU19NT0RFTE8gTElORUFMLnhsc3giDQoNCmxpYnJhcnkocmVhZHhsKQ0KaG9qYTE9cmVhZF9leGNlbChydXRhKQ0KDQpkYXRvcz1kYXRhLmZyYW1lKGhvamExKQ0KZGF0b3MNCmBgYA0KDQpUcmFpbiAoZW50cmVuYXIpIHkgVGVzdA0KYGBge3J9DQpucm93KGRhdG9zKQ0KYGBgDQoNCmBgYHtyfQ0KcGFydGljaW9uPXNhbXBsZShucm93KGRhdG9zKSwwLjgqbnJvdyhkYXRvcyksDQogICAgICAgICAgICAgICAgIHJlcGxhY2U9RkFMU0UpDQpwYXJ0aWNpb24NCmBgYA0KDQpgYGB7cn0NCnRyYWluPWRhdG9zW3BhcnRpY2lvbixdDQp0ZXN0PWRhdG9zWy1wYXJ0aWNpb24sXQ0KdGVzdA0KYGBgDQoNCmBgYHtyfQ0KI2Rpc2XDsW8gZGUgbGFzIGV0aXF1ZXRhcw0KcGxvdCh0cmFpbikNCmBgYA0KDQpgYGB7cn0NCmF0dGFjaCh0cmFpbikNCmBgYA0KYGBge3J9DQpwbG90KHRyYWluJGV4cGVyaWVuY2lhLHRyYWluJHNhbGFyaW9fYWN0dWFsKQ0KYGBgDQoNCmBgYHtyfQ0KY29yLnRlc3Qoc2FsYXJpb19hY3R1YWwsZXhwZXJpZW5jaWEpDQpgYGANCkNyZWFyIG1vZGVsbw0KDQpgYGB7cn0NCm1vZGVsbz1sbShzYWxhcmlvX2FjdHVhbH5leHBlcmllbmNpYSkNCm1vZGVsbw0KYGBgDQoNClJlc3VtZW4gZXN0YWRpc3RpY28gZGVsIG1vZGVsbw0KYGBge3J9DQpzdW1tYXJ5KG1vZGVsbykNCmBgYA0KYGBge3J9DQpleHBlcmllbmNpYT10ZXN0JGV4cGVyaWVuY2lhDQpleHBlcmllbmNpYQ0KDQpgYGANCkVudmlhciBkYXRhIGRlIGV4cGVyaWVuY2UgYWwgbW9kZWxvIHBhcmEgcHJlZGVjaXIgc2FsYXJpbyBhY3R1YWwNCg0KYGBge3J9DQoocHJlZGVjaXI9cHJlZGljdChtb2RlbG8sbGlzdChleHBlcmllbmNpYT10ZXN0JGV4cGVyaWVuY2lhKSkpDQpgYGANClZlciBtYXRyaXogdmFsb3IgcmVhbCB2cyBwcmVkZWNpcg0KYGBge3J9DQoobWF0cml6PWRhdGEuZnJhbWUodGVzdCRzYWxhcmlvX2FjdHVhbCxwcmVkZWNpcikpDQpgYGANCg0KYGBge3J9DQojZ3JhZmljbw0KcGxvdCh0ZXN0JGV4cGVyaWVuY2lhLHRlc3Qkc2FsYXJpb19hY3R1YWwscGNoPTIwKQ0KbGluZXModGVzdCRleHBlcmllbmNpYSxwcmVkZWNpcixjb2w9MikNCg0KYGBgDQoNCmBgYHtyfQ0KZXhwZXJpZW5jaWE9YygxMDA6MTUwKQ0KZXhwZXJpZW5jaWENCmBgYA0KYGBge3J9DQojZW52aWFyIGRhdGEgbnVldmENCnByZWRpY3QobW9kZWxvLGV4cGVyaWVuY2lhPWMoMTAwOjE1MCkpDQpwcmVkaWN0DQpgYGANCk1vZGVsbyBNdWx0aXZhcmlhZG8gMQ0KYGBge3J9DQojICoqKioqIG1vZGVsbyBsaW5lYWwgbXVsdGlwbGUgKioqKioqKg0KbW9kZWxvPWxtKHRyYWluJHNhbGFyaW9fYWN0dWFsfnRyYWluJGV4cGVyaWVuY2lhK3RyYWluJGFudGlndWVkYWQrdHJhaW4kc2V4bykNCm1vZGVsbw0KYGBgDQoNCmBgYHtyfQ0Kc3VtbWFyeShtb2RlbG8pDQpgYGANCk1vZGVsbyBNdWx0aXZhcmlhZG8gVmVyc2lvbiAyDQoNCkVudmlhciB0b2RhcyBsYSBldGlxdWV0YXMgZGUgbGFzIHZhcmlhYmxlcyBxdWUgbm8gZGVzY2FydGFtb3MuDQpgYGB7cn0NCm1vZGVsbzI9bG0odHJhaW4kc2FsYXJpb19hY3R1YWx+dHJhaW4kZXhwZXJpZW5jaWErdHJhaW4kYW50aWd1ZWRhZCkNCm1vZGVsbzINCmBgYA0KDQpgYGB7cn0NCnN1bW1hcnkobW9kZWxvMikNCmBgYA0KDQo=