library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
# library(plotly) # no se está usando
library(knitr)
library(PerformanceAnalytics) # Para correlaciones gráficas
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
##
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
##
## legend
library(caret) # Para particionar
## Loading required package: lattice
library(Metrics) # Para determinar rmse
##
## Attaching package: 'Metrics'
## The following objects are masked from 'package:caret':
##
## precision, recall
library(PerformanceAnalytics) # Para cor gráfica
datos <- read.csv("https://raw.githubusercontent.com/rpizarrog/Analisis-Inteligente-de-datos/main/datos/Advertising_Web.csv")
str(datos)
## 'data.frame': 200 obs. of 7 variables:
## $ X.1 : int 1 2 3 4 5 6 7 8 9 10 ...
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ TV : num 230.1 44.5 17.2 151.5 180.8 ...
## $ Radio : num 37.8 39.3 45.9 41.3 10.8 48.9 32.8 19.6 2.1 2.6 ...
## $ Newspaper: num 69.2 45.1 69.3 58.5 58.4 75 23.5 11.6 1 21.2 ...
## $ Web : num 306.6 302.7 49.5 257.8 195.7 ...
## $ Sales : num 22.1 10.4 9.3 18.5 12.9 7.2 11.8 13.2 4.8 10.6 ...
summary(datos)
## X.1 X TV Radio
## Min. : 1.00 Min. : 1.00 Min. : 0.70 Min. : 0.000
## 1st Qu.: 50.75 1st Qu.: 50.75 1st Qu.: 74.38 1st Qu.: 9.975
## Median :100.50 Median :100.50 Median :149.75 Median :22.900
## Mean :100.50 Mean :100.50 Mean :147.04 Mean :23.264
## 3rd Qu.:150.25 3rd Qu.:150.25 3rd Qu.:218.82 3rd Qu.:36.525
## Max. :200.00 Max. :200.00 Max. :296.40 Max. :49.600
## Newspaper Web Sales
## Min. : 0.30 Min. : 4.308 Min. : 1.60
## 1st Qu.: 12.75 1st Qu.: 99.049 1st Qu.:10.38
## Median : 25.75 Median :156.862 Median :12.90
## Mean : 30.55 Mean :159.587 Mean :14.02
## 3rd Qu.: 45.10 3rd Qu.:212.312 3rd Qu.:17.40
## Max. :114.00 Max. :358.247 Max. :27.00
datos <- select(datos, TV, Radio, Newspaper, Web, Sales)
# cor(datos)
chart.Correlation(datos)
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
set.seed(1550)
n <- nrow(datos)
entrena <- createDataPartition(y = datos$Sales, p = 0.70, list = FALSE, times = 1)
# Datos entrenamiento
datos.entrenamiento <- datos[entrena, ] # [renglones, columna]
# Datos validación
datos.validacion <- datos[-entrena, ]
datos.entrenamiento
## TV Radio Newspaper Web Sales
## 3 17.2 45.9 69.3 49.498908 9.3
## 4 151.5 41.3 58.5 257.816893 18.5
## 5 180.8 10.8 58.4 195.660076 12.9
## 7 57.5 32.8 23.5 246.811598 11.8
## 9 8.6 2.1 1.0 144.617385 4.8
## 10 199.8 2.6 21.2 111.272264 10.6
## 15 204.1 32.9 46.0 245.774960 19.0
## 16 195.4 47.7 52.9 148.095134 22.4
## 17 67.8 36.6 114.0 202.638903 12.5
## 23 13.2 15.9 49.6 219.882776 5.6
## 24 228.3 16.9 26.2 51.170073 15.5
## 25 62.3 12.6 18.3 256.965240 9.7
## 26 262.9 3.5 19.5 160.562859 12.0
## 27 142.9 29.3 12.6 275.512483 15.0
## 28 240.1 16.7 22.9 228.157437 15.9
## 29 248.8 27.1 22.9 318.644967 18.9
## 31 292.9 28.3 43.2 121.464347 21.4
## 32 112.9 17.4 38.6 295.883989 11.9
## 33 97.2 1.5 30.0 139.781089 9.6
## 35 95.7 1.4 7.4 321.174609 9.5
## 36 290.7 4.1 8.5 181.983424 12.8
## 37 266.9 43.8 5.0 96.316829 25.4
## 39 43.1 26.7 35.1 122.753591 10.1
## 42 177.0 33.4 38.7 147.859324 17.1
## 44 206.9 8.4 26.4 213.609610 12.9
## 47 89.7 9.9 35.7 216.504015 10.6
## 48 239.9 41.5 18.5 105.962913 23.2
## 49 227.2 15.8 49.9 75.269182 14.8
## 51 199.8 3.1 34.6 151.990733 11.4
## 52 100.4 9.6 3.6 41.335255 10.7
## 54 182.6 46.2 58.7 176.050052 21.2
## 56 198.9 49.4 60.0 204.418927 23.7
## 57 7.3 28.1 41.4 121.328525 5.5
## 59 210.8 49.6 37.7 32.411740 23.8
## 60 210.7 29.5 9.3 138.895554 18.4
## 62 261.3 42.7 54.7 224.832039 24.2
## 63 239.3 15.5 27.3 312.209555 15.7
## 64 102.7 29.6 8.4 183.009750 14.0
## 65 131.1 42.8 28.9 124.382228 18.0
## 66 69.0 9.3 0.9 205.993485 9.3
## 67 31.5 24.6 2.2 216.471397 9.5
## 68 139.3 14.5 10.2 207.661990 13.4
## 69 237.4 27.5 11.0 291.548597 18.9
## 70 216.8 43.9 27.2 149.396103 22.3
## 72 109.8 14.3 31.7 151.990733 12.4
## 74 129.4 5.7 31.3 61.306191 11.0
## 75 213.4 24.6 13.1 156.284261 17.0
## 76 16.9 43.7 89.4 70.234282 8.7
## 78 120.5 28.5 14.2 97.455125 14.2
## 79 5.4 29.9 9.4 4.308085 5.3
## 80 116.0 7.7 23.1 120.053504 11.0
## 81 76.4 26.7 22.3 268.151320 11.8
## 82 239.8 4.1 36.9 169.946395 12.3
## 84 68.4 44.5 35.6 78.393104 13.6
## 85 213.5 43.0 33.8 191.868374 21.7
## 86 193.2 18.4 65.7 223.578793 15.2
## 88 110.7 40.6 63.2 107.430521 16.0
## 89 88.3 25.5 73.4 260.101928 12.9
## 90 109.8 47.8 51.4 162.727890 16.7
## 91 134.3 4.9 9.3 258.355488 11.2
## 92 28.6 1.5 33.0 172.467947 7.3
## 93 217.7 33.5 59.0 150.962754 19.4
## 94 250.9 36.5 72.3 202.102158 22.2
## 98 184.9 21.0 22.0 253.300721 15.5
## 101 222.4 4.3 49.8 125.627143 11.7
## 102 296.4 36.3 100.9 61.005251 23.8
## 103 280.2 10.1 21.4 49.808451 14.8
## 104 187.9 17.2 17.9 97.088630 14.7
## 106 137.9 46.4 59.0 138.762632 19.2
## 107 25.0 11.0 29.7 15.938208 7.2
## 111 225.8 8.2 56.5 95.185762 13.4
## 112 241.7 38.0 23.2 180.511528 21.8
## 113 175.7 15.4 2.4 71.682551 14.1
## 115 78.2 46.8 34.5 76.770428 14.6
## 117 139.2 14.3 25.6 234.183118 12.2
## 118 76.4 0.8 14.8 234.384501 9.4
## 119 125.7 36.9 79.2 187.840415 15.9
## 120 19.4 16.0 22.3 112.892609 6.6
## 122 18.8 21.7 50.4 63.854924 7.0
## 123 224.0 2.4 15.6 89.515821 11.6
## 124 123.1 34.6 12.4 15.757191 15.2
## 125 229.5 32.3 74.2 88.080721 19.7
## 126 87.2 11.8 25.9 121.090982 10.6
## 127 7.8 38.9 50.6 209.471977 6.6
## 128 80.2 0.0 9.2 358.247042 8.8
## 129 220.3 49.0 3.2 187.437060 24.7
## 133 8.4 27.2 2.1 238.055219 5.7
## 135 36.9 38.6 65.6 81.246748 10.8
## 136 48.3 47.0 8.5 61.227323 11.6
## 137 25.6 39.0 9.3 77.230797 9.5
## 139 43.0 25.9 20.5 181.368740 9.6
## 140 184.9 43.9 1.7 106.253829 20.7
## 141 73.4 17.0 12.9 174.772137 10.9
## 142 193.7 35.4 75.6 152.284937 19.2
## 143 220.5 33.2 37.9 6.007436 20.1
## 145 96.2 14.8 38.9 157.440047 11.4
## 146 140.3 1.9 9.0 231.883385 10.3
## 147 240.1 7.3 8.7 23.496943 13.2
## 148 243.2 49.0 44.3 151.990733 25.4
## 149 38.0 40.3 11.9 75.207978 10.9
## 150 44.7 25.8 20.6 235.622449 10.1
## 151 280.7 13.9 37.0 81.040617 16.1
## 152 121.0 8.4 48.7 103.255212 11.6
## 155 187.8 21.1 9.5 63.071208 15.6
## 156 4.1 11.6 5.7 113.270712 3.2
## 157 93.9 43.5 50.5 74.361939 15.3
## 159 11.7 36.9 45.2 185.866079 7.3
## 160 131.7 18.4 34.6 196.370304 12.9
## 161 172.5 18.1 30.7 207.496801 14.4
## 162 85.7 35.8 49.3 188.933530 13.3
## 163 188.4 18.1 25.6 158.461520 14.9
## 164 163.5 36.8 7.4 82.228794 18.0
## 166 234.5 3.4 84.8 135.024909 11.9
## 167 17.9 37.6 21.6 99.936953 8.0
## 169 215.4 23.6 57.6 203.431267 17.1
## 171 50.0 11.6 18.4 64.014805 8.4
## 172 164.5 20.9 47.4 96.180391 14.5
## 173 19.6 20.1 17.0 155.583662 7.6
## 174 168.4 7.1 12.8 218.180829 11.7
## 176 276.9 48.9 41.8 151.990733 27.0
## 177 248.4 30.2 20.3 163.852044 20.2
## 178 170.2 7.8 35.2 104.917344 11.7
## 179 276.7 2.3 23.7 137.323772 11.8
## 180 165.6 10.0 17.6 151.990733 12.6
## 181 156.6 2.6 8.3 122.116470 10.5
## 184 287.6 43.0 71.8 154.309725 26.2
## 185 253.8 21.3 30.0 181.579051 17.6
## 186 205.0 45.1 19.6 208.692690 22.6
## 187 139.5 2.1 26.6 236.744035 10.3
## 188 191.1 28.7 18.2 239.275713 17.3
## 189 286.0 13.9 3.7 151.990733 15.9
## 190 18.7 12.1 23.4 222.906951 6.7
## 191 39.5 41.1 5.8 219.890583 10.8
## 192 75.5 10.8 6.0 301.481194 9.9
## 193 17.2 4.1 31.6 265.028644 5.9
## 194 166.8 42.0 3.6 192.246211 19.6
## 195 149.7 35.6 6.0 99.579981 17.3
## 196 38.2 3.7 13.8 248.841073 7.6
## 197 94.2 4.9 8.1 118.041856 9.7
## 198 177.0 9.3 6.4 213.274671 12.8
## 199 283.6 42.0 66.2 237.498063 25.5
## 200 232.1 8.6 8.7 151.990733 13.4
datos.validacion
## TV Radio Newspaper Web Sales
## 1 230.1 37.8 69.2 306.63475 22.1
## 2 44.5 39.3 45.1 302.65307 10.4
## 6 8.7 48.9 75.0 22.07240 7.2
## 8 120.2 19.6 11.6 229.97146 13.2
## 11 66.1 5.8 24.2 45.35903 8.6
## 12 214.7 24.0 4.0 164.97176 17.4
## 13 23.8 35.1 65.9 87.92109 9.2
## 14 97.5 7.6 7.2 173.65804 9.7
## 18 281.4 39.6 55.8 41.75531 24.4
## 19 69.2 20.5 18.3 210.48991 11.3
## 20 147.3 23.9 19.1 268.73538 14.6
## 21 218.4 27.7 53.4 59.96055 18.0
## 22 237.4 5.1 23.5 296.95207 12.5
## 30 70.6 16.0 40.8 61.32436 10.5
## 34 265.6 20.0 0.3 94.20726 17.4
## 38 74.7 49.4 45.7 56.53622 14.7
## 40 228.0 37.7 32.0 196.48327 21.5
## 41 202.5 22.3 31.6 88.21282 16.6
## 43 293.6 27.7 1.8 174.71682 20.7
## 45 25.1 25.7 43.3 245.76441 8.5
## 46 175.1 22.5 31.5 62.80926 14.9
## 50 66.9 11.7 36.8 205.25350 9.7
## 53 216.4 41.7 39.6 161.80251 22.6
## 55 262.7 28.8 15.9 324.61518 20.2
## 58 136.2 19.2 16.6 60.45435 13.2
## 61 53.5 2.0 21.4 39.21715 8.1
## 71 199.1 30.6 38.7 210.75214 18.3
## 73 26.8 33.0 19.3 211.99091 8.8
## 77 27.5 1.6 20.7 117.10193 6.9
## 83 75.3 20.3 32.5 231.20983 11.3
## 87 76.3 27.5 16.0 193.83089 12.0
## 95 107.4 14.0 10.9 151.99073 11.5
## 96 163.3 31.6 52.9 155.59488 16.9
## 97 197.6 3.5 5.9 139.83054 11.7
## 99 289.7 42.3 51.2 183.56958 25.4
## 100 135.2 41.7 45.9 40.60035 17.2
## 105 238.2 34.3 5.3 112.15549 20.7
## 108 90.4 0.3 23.2 261.38088 8.7
## 109 13.1 0.4 25.6 252.39135 5.3
## 110 255.4 26.9 5.5 273.45413 19.8
## 114 209.6 20.6 10.7 42.88380 15.9
## 116 75.1 35.0 52.7 204.27671 12.6
## 121 141.3 26.8 46.2 65.52546 15.5
## 130 59.6 12.0 43.1 197.19655 9.7
## 131 0.7 39.6 8.7 162.90259 1.6
## 132 265.2 2.9 43.0 172.15666 12.7
## 134 219.8 33.5 45.1 171.47802 19.6
## 138 273.7 28.9 59.7 288.26061 20.8
## 144 104.6 5.7 34.4 336.57109 10.4
## 153 197.6 23.3 14.2 159.52256 16.6
## 154 171.3 39.7 37.7 155.01622 19.0
## 158 149.8 1.3 24.3 145.80321 10.1
## 165 117.2 14.7 5.4 109.00876 11.9
## 168 206.8 5.2 19.4 115.37196 12.2
## 170 284.3 10.6 6.4 157.90011 15.0
## 175 222.4 3.4 13.1 144.52566 11.5
## 182 218.5 5.4 27.4 162.38749 12.2
## 183 56.2 5.7 29.7 42.19929 8.7
modelo_rm <- lm(data = datos.entrenamiento, formula = Sales ~ TV + Radio + Newspaper + Web)
summary(modelo_rm)
##
## Call:
## lm(formula = Sales ~ TV + Radio + Newspaper + Web, data = datos.entrenamiento)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.4172 -0.7633 0.1705 1.0893 3.1139
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.710666 0.457663 3.738 0.000272 ***
## TV 0.046002 0.001520 30.258 < 2e-16 ***
## Radio 0.204716 0.009043 22.639 < 2e-16 ***
## Newspaper -0.004891 0.005964 -0.820 0.413610
## Web 0.005880 0.001760 3.341 0.001075 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.531 on 137 degrees of freedom
## Multiple R-squared: 0.9183, Adjusted R-squared: 0.9159
## F-statistic: 384.9 on 4 and 137 DF, p-value: < 2.2e-16
predicciones <- predict(object = modelo_rm, newdata = datos.validacion)
comparaciones <- data.frame(datos.validacion, predicciones)
comparaciones
## TV Radio Newspaper Web Sales predicciones
## 1 230.1 37.8 69.2 306.63475 22.1 21.498579
## 2 44.5 39.3 45.1 302.65307 10.4 13.362165
## 6 8.7 48.9 75.0 22.07240 7.2 11.884468
## 8 120.2 19.6 11.6 229.97146 13.2 12.548066
## 11 66.1 5.8 24.2 45.35903 8.6 6.087101
## 12 214.7 24.0 4.0 164.97176 17.4 17.450957
## 13 23.8 35.1 65.9 87.92109 9.2 10.185723
## 14 97.5 7.6 7.2 173.65804 9.7 8.737617
## 18 281.4 39.6 55.8 41.75531 24.4 22.734967
## 19 69.2 20.5 18.3 210.48991 11.3 10.238890
## 20 147.3 23.9 19.1 268.73538 14.6 14.866253
## 21 218.4 27.7 53.4 59.96055 18.0 17.519514
## 22 237.4 5.1 23.5 296.95207 12.5 15.306756
## 30 70.6 16.0 40.8 61.32436 10.5 8.394903
## 34 265.6 20.0 0.3 94.20726 17.4 18.575577
## 38 74.7 49.4 45.7 56.53622 14.7 15.368911
## 40 228.0 37.7 32.0 196.48327 21.5 20.915739
## 41 202.5 22.3 31.6 88.21282 16.6 15.955369
## 43 293.6 27.7 1.8 174.71682 20.7 21.906019
## 45 25.1 25.7 43.3 245.76441 8.5 9.359876
## 46 175.1 22.5 31.5 62.80926 14.9 14.586972
## 50 66.9 11.7 36.8 205.25350 9.7 8.210308
## 53 216.4 41.7 39.6 161.80251 22.6 20.959882
## 55 262.7 28.8 15.9 324.61518 20.2 21.522213
## 58 136.2 19.2 16.6 60.45435 13.2 12.180965
## 61 53.5 2.0 21.4 39.21715 8.1 4.707135
## 71 199.1 30.6 38.7 210.75214 18.3 18.183933
## 73 26.8 33.0 19.3 211.99091 8.8 10.851298
## 77 27.5 1.6 20.7 117.10193 6.9 3.890599
## 83 75.3 20.3 32.5 231.20983 11.3 10.530943
## 87 76.3 27.5 16.0 193.83089 12.0 11.911808
## 95 107.4 14.0 10.9 151.99073 11.5 10.357715
## 96 163.3 31.6 52.9 155.59488 16.9 16.347996
## 97 197.6 3.5 5.9 139.83054 11.7 12.310515
## 99 289.7 42.3 51.2 183.56958 25.4 24.525908
## 100 135.2 41.7 45.9 40.60035 17.2 16.481026
## 105 238.2 34.3 5.3 112.15549 20.7 20.323651
## 108 90.4 0.3 23.2 261.38088 8.7 7.354144
## 109 13.1 0.4 25.6 252.39135 5.3 3.754072
## 110 255.4 26.9 5.5 273.45413 19.8 20.547470
## 114 209.6 20.6 10.7 42.88380 15.9 15.769644
## 116 75.1 35.0 52.7 204.27671 12.6 13.273901
## 121 141.3 26.8 46.2 65.52546 15.5 13.856462
## 130 59.6 12.0 43.1 197.19655 9.7 7.857720
## 131 0.7 39.6 8.7 162.90259 1.6 10.764973
## 132 265.2 2.9 43.0 172.15666 12.7 15.306039
## 134 219.8 33.5 45.1 171.47802 19.6 19.467608
## 138 273.7 28.9 59.7 288.26061 20.8 21.620708
## 144 104.6 5.7 34.4 336.57109 10.4 9.500191
## 153 197.6 23.3 14.2 159.52256 16.6 16.439093
## 154 171.3 39.7 37.7 155.01622 19.0 18.445153
## 158 149.8 1.3 24.3 145.80321 10.1 9.606375
## 165 117.2 14.7 5.4 109.00876 11.9 10.725994
## 168 206.8 5.2 19.4 115.37196 12.2 12.871901
## 170 284.3 10.6 6.4 157.90011 15.0 17.856170
## 175 222.4 3.4 13.1 144.52566 11.5 13.423283
## 182 218.5 5.4 27.4 162.38749 12.2 13.688397
## 183 56.2 5.7 29.7 42.19929 8.7 5.565730
rmse <- rmse(actual = comparaciones$Sales, predicted = comparaciones$predicciones)
rmse
## [1] 1.980575
ggplot(data = comparaciones) +
geom_line(aes(x = 1:nrow(comparaciones), y = Sales), col='blue') +
geom_line(aes(x = 1:nrow(comparaciones), y = predicciones), col='yellow') +
ggtitle(label="Valores reales vs predichos Adverstising")
TV <- c(140, 160)
Radio <- c(60, 40)
Newspaper <- c(80, 90)
Web <- c(120, 145)
nuevos <- data.frame(TV, Radio, Newspaper, Web)
nuevos
## TV Radio Newspaper Web
## 1 140 60 80 120
## 2 160 40 90 145
Y.predicciones <- predict(object = modelo_rm, newdata = nuevos)
Y.predicciones
## 1 2
## 20.74824 17.67205
Una empresa necesita conocer la relación de sus ventas con la cantidad de dinero invertido en distintos medios.
Se tiene un total de 200 observaciones. Las variables de interés son TV, Radio, Newspaper, Web y Sales.
Las variables independientes son TV, Radio, Newspaper y Web, la variable dependiente es Sales.
Se entrenará y validará con un 30% y 70% de los datos para el entrenamiento y la validación, respectivamente. La semilla a utilizar es 1550.
TV, Radio y Web tienen una confiabilidad mayor al 90%
TV: 0.046002 con un 99.9999% Radio: 0.204716 con un 99.9999% Newspaper: 0.004891 con un 58.639% Web: 0.005880 con un 99.8925%
Tiene un valor de 0.9159, el cual significa que el modelo tiene una certeza del 91.59%.
Tiene un valor de 1.980575 y representa la cantidad de dispersión posible de entre los datos presentados.
Sí, este bien podría ser una regresión lineal múltiple con más de una variable dependiente o más variables independientes.
Según los datos de correlación, el R Square y el RMSE, puedo concluir que este modelo, con la semilla 1550, tiene un grado de certeza muy alto, perfecto para su posterior uso en predicciones, simulaciones, etc.