library(pacman)
library(readxl)
p_load("base64enc", "htmltools", "mime", "xfun", "prettydoc", "ggplot2", "tidyr", "plotly", "DT")
setwd("~/Stat206145")
columna1590 <- read_excel("columna1590.xlsx")
names(columna1590)
## [1] "Recu1590" "Dias1590"
datatable(columna1590)
pairs(columna1590)
cor(columna1590)
## Recu1590 Dias1590
## Recu1590 1.0000000 0.9734581
## Dias1590 0.9734581 1.0000000
El comando básico es lm (linear models). El primer argumento de este comando es una fórmula y ~ x en la que se especifica cuál es la variable respuesta o dependiente (y) y cuál es la variable regresora o independiente (x). El segundo argumento, llamado data especifica cuál es el fichero en el que se encuentran las variables. El resultado lo guardamos en un objeto llamado regresion. Este objeto es una lista que contiene toda la información relevante sobre el análisis. Mediante el comando summary obtenemos un resumen de los principales resultados:
reg1590 <- lm(Recu1590 ~ Dias1590, data = columna1590)
summary(reg1590)
##
## Call:
## lm(formula = Recu1590 ~ Dias1590, data = columna1590)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.5040 -1.0388 0.2099 1.2860 2.1407
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.426359 0.290122 4.916 3.5e-06 ***
## Dias1590 0.209008 0.004939 42.321 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.447 on 99 degrees of freedom
## Multiple R-squared: 0.9476, Adjusted R-squared: 0.9471
## F-statistic: 1791 on 1 and 99 DF, p-value: < 2.2e-16
Los parámetros de la ecuación de la recta de mÃnimos cuadrados que relaciona la cantidad de Recuperación de la columna en función de los Dias vienen dados por la columna ´Estimate´ de la tabla ´Coefficients´ de la salida anterior. Por lo tanto, en este caso la ecuación de la recta de mÃnimos cuadrados es:
Lo siguiente representa la nube de puntos y añaden la representación gráfica de la recta de mÃnimos cuadrados:
plot(columna1590$Dias1590, columna1590$Recu1590, xlab="Dias1590", ylab = "Recu1590")
abline(reg1590)
nuevas.recu1590 <- data.frame(Dias1590=seq(99,472))
predict.lm(reg1590, nuevas.recu1590)
## 1 2 3 4 5 6 7 8
## 22.11813 22.32714 22.53614 22.74515 22.95416 23.16317 23.37217 23.58118
## 9 10 11 12 13 14 15 16
## 23.79019 23.99920 24.20821 24.41721 24.62622 24.83523 25.04424 25.25324
## 17 18 19 20 21 22 23 24
## 25.46225 25.67126 25.88027 26.08927 26.29828 26.50729 26.71630 26.92531
## 25 26 27 28 29 30 31 32
## 27.13431 27.34332 27.55233 27.76134 27.97034 28.17935 28.38836 28.59737
## 33 34 35 36 37 38 39 40
## 28.80638 29.01538 29.22439 29.43340 29.64241 29.85141 30.06042 30.26943
## 41 42 43 44 45 46 47 48
## 30.47844 30.68745 30.89645 31.10546 31.31447 31.52348 31.73248 31.94149
## 49 50 51 52 53 54 55 56
## 32.15050 32.35951 32.56852 32.77752 32.98653 33.19554 33.40455 33.61355
## 57 58 59 60 61 62 63 64
## 33.82256 34.03157 34.24058 34.44959 34.65859 34.86760 35.07661 35.28562
## 65 66 67 68 69 70 71 72
## 35.49462 35.70363 35.91264 36.12165 36.33066 36.53966 36.74867 36.95768
## 73 74 75 76 77 78 79 80
## 37.16669 37.37569 37.58470 37.79371 38.00272 38.21173 38.42073 38.62974
## 81 82 83 84 85 86 87 88
## 38.83875 39.04776 39.25676 39.46577 39.67478 39.88379 40.09279 40.30180
## 89 90 91 92 93 94 95 96
## 40.51081 40.71982 40.92883 41.13783 41.34684 41.55585 41.76486 41.97386
## 97 98 99 100 101 102 103 104
## 42.18287 42.39188 42.60089 42.80990 43.01890 43.22791 43.43692 43.64593
## 105 106 107 108 109 110 111 112
## 43.85493 44.06394 44.27295 44.48196 44.69097 44.89997 45.10898 45.31799
## 113 114 115 116 117 118 119 120
## 45.52700 45.73600 45.94501 46.15402 46.36303 46.57204 46.78104 46.99005
## 121 122 123 124 125 126 127 128
## 47.19906 47.40807 47.61707 47.82608 48.03509 48.24410 48.45311 48.66211
## 129 130 131 132 133 134 135 136
## 48.87112 49.08013 49.28914 49.49814 49.70715 49.91616 50.12517 50.33418
## 137 138 139 140 141 142 143 144
## 50.54318 50.75219 50.96120 51.17021 51.37921 51.58822 51.79723 52.00624
## 145 146 147 148 149 150 151 152
## 52.21524 52.42425 52.63326 52.84227 53.05128 53.26028 53.46929 53.67830
## 153 154 155 156 157 158 159 160
## 53.88731 54.09631 54.30532 54.51433 54.72334 54.93235 55.14135 55.35036
## 161 162 163 164 165 166 167 168
## 55.55937 55.76838 55.97738 56.18639 56.39540 56.60441 56.81342 57.02242
## 169 170 171 172 173 174 175 176
## 57.23143 57.44044 57.64945 57.85845 58.06746 58.27647 58.48548 58.69449
## 177 178 179 180 181 182 183 184
## 58.90349 59.11250 59.32151 59.53052 59.73952 59.94853 60.15754 60.36655
## 185 186 187 188 189 190 191 192
## 60.57556 60.78456 60.99357 61.20258 61.41159 61.62059 61.82960 62.03861
## 193 194 195 196 197 198 199 200
## 62.24762 62.45663 62.66563 62.87464 63.08365 63.29266 63.50166 63.71067
## 201 202 203 204 205 206 207 208
## 63.91968 64.12869 64.33770 64.54670 64.75571 64.96472 65.17373 65.38273
## 209 210 211 212 213 214 215 216
## 65.59174 65.80075 66.00976 66.21876 66.42777 66.63678 66.84579 67.05480
## 217 218 219 220 221 222 223 224
## 67.26380 67.47281 67.68182 67.89083 68.09983 68.30884 68.51785 68.72686
## 225 226 227 228 229 230 231 232
## 68.93587 69.14487 69.35388 69.56289 69.77190 69.98090 70.18991 70.39892
## 233 234 235 236 237 238 239 240
## 70.60793 70.81694 71.02594 71.23495 71.44396 71.65297 71.86197 72.07098
## 241 242 243 244 245 246 247 248
## 72.27999 72.48900 72.69801 72.90701 73.11602 73.32503 73.53404 73.74304
## 249 250 251 252 253 254 255 256
## 73.95205 74.16106 74.37007 74.57908 74.78808 74.99709 75.20610 75.41511
## 257 258 259 260 261 262 263 264
## 75.62411 75.83312 76.04213 76.25114 76.46015 76.66915 76.87816 77.08717
## 265 266 267 268 269 270 271 272
## 77.29618 77.50518 77.71419 77.92320 78.13221 78.34121 78.55022 78.75923
## 273 274 275 276 277 278 279 280
## 78.96824 79.17725 79.38625 79.59526 79.80427 80.01328 80.22228 80.43129
## 281 282 283 284 285 286 287 288
## 80.64030 80.84931 81.05832 81.26732 81.47633 81.68534 81.89435 82.10335
## 289 290 291 292 293 294 295 296
## 82.31236 82.52137 82.73038 82.93939 83.14839 83.35740 83.56641 83.77542
## 297 298 299 300 301 302 303 304
## 83.98442 84.19343 84.40244 84.61145 84.82046 85.02946 85.23847 85.44748
## 305 306 307 308 309 310 311 312
## 85.65649 85.86549 86.07450 86.28351 86.49252 86.70153 86.91053 87.11954
## 313 314 315 316 317 318 319 320
## 87.32855 87.53756 87.74656 87.95557 88.16458 88.37359 88.58260 88.79160
## 321 322 323 324 325 326 327 328
## 89.00061 89.20962 89.41863 89.62763 89.83664 90.04565 90.25466 90.46367
## 329 330 331 332 333 334 335 336
## 90.67267 90.88168 91.09069 91.29970 91.50870 91.71771 91.92672 92.13573
## 337 338 339 340 341 342 343 344
## 92.34473 92.55374 92.76275 92.97176 93.18077 93.38977 93.59878 93.80779
## 345 346 347 348 349 350 351 352
## 94.01680 94.22580 94.43481 94.64382 94.85283 95.06184 95.27084 95.47985
## 353 354 355 356 357 358 359 360
## 95.68886 95.89787 96.10687 96.31588 96.52489 96.73390 96.94291 97.15191
## 361 362 363 364 365 366 367 368
## 97.36092 97.56993 97.77894 97.98794 98.19695 98.40596 98.61497 98.82398
## 369 370 371 372 373 374
## 99.03298 99.24199 99.45100 99.66001 99.86901 100.07802
En función del rango de 99 a 472 dias se predicen los 374 datos subsecuentes de nuevas recuperaciones, lo que quiere decir que, mediante este modelo, se tiene que seguir monitoreando aproximadamente 374 dias mas para lograr una recuperacioon del 100%.
columna1575 <- read_excel("columna1575.xlsx")
names(columna1575)
## [1] "Recu1575" "Dias1575"
datatable(columna1575)
pairs(columna1575)
cor(columna1575)
## Recu1575 Dias1575
## Recu1575 1.0000 0.9241
## Dias1575 0.9241 1.0000
reg1575 <- lm(Recu1575 ~ Dias1575, data = columna1575)
summary(reg1575)
##
## Call:
## lm(formula = Recu1575 ~ Dias1575, data = columna1575)
##
## Residuals:
## Min 1Q Median 3Q Max
## -30.0283 -3.5569 0.5089 5.8425 8.9745
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 29.638257 0.906414 32.70 <2e-16 ***
## Dias1575 0.222730 0.005456 40.82 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.658 on 285 degrees of freedom
## Multiple R-squared: 0.854, Adjusted R-squared: 0.8534
## F-statistic: 1667 on 1 and 285 DF, p-value: < 2.2e-16
plot(columna1575$Dias1575, columna1575$Recu1575, xlab="Dias1575", ylab = "Recu1575")
abline(reg1575)
nuevas.recu1575 <- data.frame(Dias1575= seq(288,316))
predict(reg1575,nuevas.recu1575)
## 1 2 3 4 5 6 7 8
## 93.78447 94.00720 94.22993 94.45266 94.67539 94.89812 95.12085 95.34358
## 9 10 11 12 13 14 15 16
## 95.56631 95.78904 96.01177 96.23450 96.45723 96.67996 96.90269 97.12542
## 17 18 19 20 21 22 23 24
## 97.34815 97.57088 97.79361 98.01634 98.23907 98.46180 98.68453 98.90726
## 25 26 27 28 29
## 99.12999 99.35272 99.57545 99.79818 100.02091
columna1470 <- read_excel("columna1470.xlsx")
names(columna1470)
## [1] "Recu1470" "Dias1470"
datatable(columna1470)
pairs(columna1470)
cor(columna1470)
## Recu1470 Dias1470
## Recu1470 1.000000 0.905505
## Dias1470 0.905505 1.000000
reg1470 <- lm(Recu1470 ~ Dias1470, data = columna1470)
summary(reg1470)
##
## Call:
## lm(formula = Recu1470 ~ Dias1470, data = columna1470)
##
## Residuals:
## Min 1Q Median 3Q Max
## -35.235 -5.122 0.353 7.277 11.034
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 38.853005 0.885358 43.88 <2e-16 ***
## Dias1470 0.172105 0.004275 40.26 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.358 on 356 degrees of freedom
## Multiple R-squared: 0.8199, Adjusted R-squared: 0.8194
## F-statistic: 1621 on 1 and 356 DF, p-value: < 2.2e-16
plot(columna1470$Dias1470, columna1470$Recu1470, xlab="Dias1470", ylab = "Recu1470")
abline(reg1470)
nuevas.recu1470 <- data.frame(Dias1470= seq(367,368))
predict(reg1470,nuevas.recu1470)
## 1 2
## 102.0156 102.1877
columna1545 <- read_excel("columna1545.xlsx")
names(columna1545)
## [1] "Recu1545" "Dias1545"
datatable(columna1545)
pairs(columna1545)
cor(columna1545)
## Recu1545 Dias1545
## Recu1545 1.0000000 0.9000888
## Dias1545 0.9000888 1.0000000
reg1545 <- lm(Recu1545 ~ Dias1545, data = columna1545)
summary(reg1545)
##
## Call:
## lm(formula = Recu1545 ~ Dias1545, data = columna1545)
##
## Residuals:
## Min 1Q Median 3Q Max
## -42.673 -3.356 2.518 4.772 6.065
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 43.795188 0.725500 60.37 <2e-16 ***
## Dias1545 0.135042 0.003426 39.41 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.926 on 364 degrees of freedom
## Multiple R-squared: 0.8102, Adjusted R-squared: 0.8096
## F-statistic: 1553 on 1 and 364 DF, p-value: < 2.2e-16
plot(columna1545$Dias1545, columna1545$Recu1545, xlab="Dias1545", ylab = "Recu1545")
abline(reg1545)
nuevas.recu1545 <- data.frame(Dias1545= seq(374,417))
predict(reg1545,nuevas.recu1545)
## 1 2 3 4 5 6 7 8
## 94.30107 94.43611 94.57115 94.70619 94.84124 94.97628 95.11132 95.24636
## 9 10 11 12 13 14 15 16
## 95.38141 95.51645 95.65149 95.78653 95.92158 96.05662 96.19166 96.32670
## 17 18 19 20 21 22 23 24
## 96.46175 96.59679 96.73183 96.86687 97.00191 97.13696 97.27200 97.40704
## 25 26 27 28 29 30 31 32
## 97.54208 97.67713 97.81217 97.94721 98.08225 98.21730 98.35234 98.48738
## 33 34 35 36 37 38 39 40
## 98.62242 98.75747 98.89251 99.02755 99.16259 99.29764 99.43268 99.56772
## 41 42 43 44
## 99.70276 99.83781 99.97285 100.10789