require(ggplot2)
require(plotly)
require(datarium)
data("marketing")
head(marketing)
## youtube facebook newspaper sales
## 1 276.12 45.36 83.04 26.52
## 2 53.40 47.16 54.12 12.48
## 3 20.64 55.08 83.16 11.16
## 4 181.80 49.56 70.20 22.20
## 5 216.96 12.96 70.08 15.48
## 6 10.44 58.68 90.00 8.64
marketing
## youtube facebook newspaper sales
## 1 276.12 45.36 83.04 26.52
## 2 53.40 47.16 54.12 12.48
## 3 20.64 55.08 83.16 11.16
## 4 181.80 49.56 70.20 22.20
## 5 216.96 12.96 70.08 15.48
## 6 10.44 58.68 90.00 8.64
## 7 69.00 39.36 28.20 14.16
## 8 144.24 23.52 13.92 15.84
## 9 10.32 2.52 1.20 5.76
## 10 239.76 3.12 25.44 12.72
## 11 79.32 6.96 29.04 10.32
## 12 257.64 28.80 4.80 20.88
## 13 28.56 42.12 79.08 11.04
## 14 117.00 9.12 8.64 11.64
## 15 244.92 39.48 55.20 22.80
## 16 234.48 57.24 63.48 26.88
## 17 81.36 43.92 136.80 15.00
## 18 337.68 47.52 66.96 29.28
## 19 83.04 24.60 21.96 13.56
## 20 176.76 28.68 22.92 17.52
## 21 262.08 33.24 64.08 21.60
## 22 284.88 6.12 28.20 15.00
## 23 15.84 19.08 59.52 6.72
## 24 273.96 20.28 31.44 18.60
## 25 74.76 15.12 21.96 11.64
## 26 315.48 4.20 23.40 14.40
## 27 171.48 35.16 15.12 18.00
## 28 288.12 20.04 27.48 19.08
## 29 298.56 32.52 27.48 22.68
## 30 84.72 19.20 48.96 12.60
## 31 351.48 33.96 51.84 25.68
## 32 135.48 20.88 46.32 14.28
## 33 116.64 1.80 36.00 11.52
## 34 318.72 24.00 0.36 20.88
## 35 114.84 1.68 8.88 11.40
## 36 348.84 4.92 10.20 15.36
## 37 320.28 52.56 6.00 30.48
## 38 89.64 59.28 54.84 17.64
## 39 51.72 32.04 42.12 12.12
## 40 273.60 45.24 38.40 25.80
## 41 243.00 26.76 37.92 19.92
## 42 212.40 40.08 46.44 20.52
## 43 352.32 33.24 2.16 24.84
## 44 248.28 10.08 31.68 15.48
## 45 30.12 30.84 51.96 10.20
## 46 210.12 27.00 37.80 17.88
## 47 107.64 11.88 42.84 12.72
## 48 287.88 49.80 22.20 27.84
## 49 272.64 18.96 59.88 17.76
## 50 80.28 14.04 44.16 11.64
## 51 239.76 3.72 41.52 13.68
## 52 120.48 11.52 4.32 12.84
## 53 259.68 50.04 47.52 27.12
## 54 219.12 55.44 70.44 25.44
## 55 315.24 34.56 19.08 24.24
## 56 238.68 59.28 72.00 28.44
## 57 8.76 33.72 49.68 6.60
## 58 163.44 23.04 19.92 15.84
## 59 252.96 59.52 45.24 28.56
## 60 252.84 35.40 11.16 22.08
## 61 64.20 2.40 25.68 9.72
## 62 313.56 51.24 65.64 29.04
## 63 287.16 18.60 32.76 18.84
## 64 123.24 35.52 10.08 16.80
## 65 157.32 51.36 34.68 21.60
## 66 82.80 11.16 1.08 11.16
## 67 37.80 29.52 2.64 11.40
## 68 167.16 17.40 12.24 16.08
## 69 284.88 33.00 13.20 22.68
## 70 260.16 52.68 32.64 26.76
## 71 238.92 36.72 46.44 21.96
## 72 131.76 17.16 38.04 14.88
## 73 32.16 39.60 23.16 10.56
## 74 155.28 6.84 37.56 13.20
## 75 256.08 29.52 15.72 20.40
## 76 20.28 52.44 107.28 10.44
## 77 33.00 1.92 24.84 8.28
## 78 144.60 34.20 17.04 17.04
## 79 6.48 35.88 11.28 6.36
## 80 139.20 9.24 27.72 13.20
## 81 91.68 32.04 26.76 14.16
## 82 287.76 4.92 44.28 14.76
## 83 90.36 24.36 39.00 13.56
## 84 82.08 53.40 42.72 16.32
## 85 256.20 51.60 40.56 26.04
## 86 231.84 22.08 78.84 18.24
## 87 91.56 33.00 19.20 14.40
## 88 132.84 48.72 75.84 19.20
## 89 105.96 30.60 88.08 15.48
## 90 131.76 57.36 61.68 20.04
## 91 161.16 5.88 11.16 13.44
## 92 34.32 1.80 39.60 8.76
## 93 261.24 40.20 70.80 23.28
## 94 301.08 43.80 86.76 26.64
## 95 128.88 16.80 13.08 13.80
## 96 195.96 37.92 63.48 20.28
## 97 237.12 4.20 7.08 14.04
## 98 221.88 25.20 26.40 18.60
## 99 347.64 50.76 61.44 30.48
## 100 162.24 50.04 55.08 20.64
## 101 266.88 5.16 59.76 14.04
## 102 355.68 43.56 121.08 28.56
## 103 336.24 12.12 25.68 17.76
## 104 225.48 20.64 21.48 17.64
## 105 285.84 41.16 6.36 24.84
## 106 165.48 55.68 70.80 23.04
## 107 30.00 13.20 35.64 8.64
## 108 108.48 0.36 27.84 10.44
## 109 15.72 0.48 30.72 6.36
## 110 306.48 32.28 6.60 23.76
## 111 270.96 9.84 67.80 16.08
## 112 290.04 45.60 27.84 26.16
## 113 210.84 18.48 2.88 16.92
## 114 251.52 24.72 12.84 19.08
## 115 93.84 56.16 41.40 17.52
## 116 90.12 42.00 63.24 15.12
## 117 167.04 17.16 30.72 14.64
## 118 91.68 0.96 17.76 11.28
## 119 150.84 44.28 95.04 19.08
## 120 23.28 19.20 26.76 7.92
## 121 169.56 32.16 55.44 18.60
## 122 22.56 26.04 60.48 8.40
## 123 268.80 2.88 18.72 13.92
## 124 147.72 41.52 14.88 18.24
## 125 275.40 38.76 89.04 23.64
## 126 104.64 14.16 31.08 12.72
## 127 9.36 46.68 60.72 7.92
## 128 96.24 0.00 11.04 10.56
## 129 264.36 58.80 3.84 29.64
## 130 71.52 14.40 51.72 11.64
## 131 0.84 47.52 10.44 1.92
## 132 318.24 3.48 51.60 15.24
## 133 10.08 32.64 2.52 6.84
## 134 263.76 40.20 54.12 23.52
## 135 44.28 46.32 78.72 12.96
## 136 57.96 56.40 10.20 13.92
## 137 30.72 46.80 11.16 11.40
## 138 328.44 34.68 71.64 24.96
## 139 51.60 31.08 24.60 11.52
## 140 221.88 52.68 2.04 24.84
## 141 88.08 20.40 15.48 13.08
## 142 232.44 42.48 90.72 23.04
## 143 264.60 39.84 45.48 24.12
## 144 125.52 6.84 41.28 12.48
## 145 115.44 17.76 46.68 13.68
## 146 168.36 2.28 10.80 12.36
## 147 288.12 8.76 10.44 15.84
## 148 291.84 58.80 53.16 30.48
## 149 45.60 48.36 14.28 13.08
## 150 53.64 30.96 24.72 12.12
## 151 336.84 16.68 44.40 19.32
## 152 145.20 10.08 58.44 13.92
## 153 237.12 27.96 17.04 19.92
## 154 205.56 47.64 45.24 22.80
## 155 225.36 25.32 11.40 18.72
## 156 4.92 13.92 6.84 3.84
## 157 112.68 52.20 60.60 18.36
## 158 179.76 1.56 29.16 12.12
## 159 14.04 44.28 54.24 8.76
## 160 158.04 22.08 41.52 15.48
## 161 207.00 21.72 36.84 17.28
## 162 102.84 42.96 59.16 15.96
## 163 226.08 21.72 30.72 17.88
## 164 196.20 44.16 8.88 21.60
## 165 140.64 17.64 6.48 14.28
## 166 281.40 4.08 101.76 14.28
## 167 21.48 45.12 25.92 9.60
## 168 248.16 6.24 23.28 14.64
## 169 258.48 28.32 69.12 20.52
## 170 341.16 12.72 7.68 18.00
## 171 60.00 13.92 22.08 10.08
## 172 197.40 25.08 56.88 17.40
## 173 23.52 24.12 20.40 9.12
## 174 202.08 8.52 15.36 14.04
## 175 266.88 4.08 15.72 13.80
## 176 332.28 58.68 50.16 32.40
## 177 298.08 36.24 24.36 24.24
## 178 204.24 9.36 42.24 14.04
## 179 332.04 2.76 28.44 14.16
## 180 198.72 12.00 21.12 15.12
## 181 187.92 3.12 9.96 12.60
## 182 262.20 6.48 32.88 14.64
## 183 67.44 6.84 35.64 10.44
## 184 345.12 51.60 86.16 31.44
## 185 304.56 25.56 36.00 21.12
## 186 246.00 54.12 23.52 27.12
## 187 167.40 2.52 31.92 12.36
## 188 229.32 34.44 21.84 20.76
## 189 343.20 16.68 4.44 19.08
## 190 22.44 14.52 28.08 8.04
## 191 47.40 49.32 6.96 12.96
## 192 90.60 12.96 7.20 11.88
## 193 20.64 4.92 37.92 7.08
## 194 200.16 50.40 4.32 23.52
## 195 179.64 42.72 7.20 20.76
## 196 45.84 4.44 16.56 9.12
## 197 113.04 5.88 9.72 11.64
## 198 212.40 11.16 7.68 15.36
## 199 340.32 50.40 79.44 30.60
## 200 278.52 10.32 10.44 16.08
## Exploracion de las ventas (sales)
promedio=mean(marketing$sales) #promedio o media
desviacion=sd(marketing$sales) #desviacion estandar
data.frame(promedio,desviacion)
## promedio desviacion
## 1 16.827 6.260948
g1=ggplot(data = marketing,mapping = aes(x=sales))+geom_histogram(fill="blue")+theme_bw()
g1
ggplotly(g1)
## se observa que las ventas en promedio historico de datos son de 16 milloones de dolares con una desviacion estandar de 6 millones, indicando que las ventas en general varian e manera significativa y suponemos que esas variaciones pueden estar relacionadas con la inversion n mercadeo y por ello el siguiente paso es un analisis exploratorio bivariado.
## Exploracion Bivariada - (Relacion entre ventas con mercadeo)
g2=ggplot(data = marketing,mapping = aes(x=newspaper,y=sales))+geom_point()+theme_bw()+geom_smooth()
g2 ## la variable ventas no se observa muy relacionada con la variable de mercadeo
cor(marketing$newspaper,marketing$sales) ## este coeficiente mas 1 positivo tienen mas relacion si es -1 es al contrario, cuando es cercano a 0 no hay mucha relacion.
## [1] 0.228299
Se puede observar que no se evidencia una relacion entre la variable inversion en mercadeo por medio de periodico y las ventas, con un coeficiente de corelacion muy bajo de 0.22(muy bajo)
g3=ggplot(data = marketing,mapping = aes(x=facebook,y=sales))+geom_point()+theme_bw()+geom_smooth()
g3
cor(marketing$facebook,marketing$sales)
## [1] 0.5762226
se observa por otro lado una relacion positiva directa entre la inversion en publicidad de facebook con las ventas , esta relacion tiene una fuerza media ya que el coeficiente es de 0.6
g4=ggplot(data = marketing,mapping = aes(x=youtube,y=sales))+geom_point()+theme_bw()+geom_smooth()
g4
cor(marketing$youtube,marketing$sales)
## [1] 0.7822244
Se observa que la relacion entre la inversion en mercadeo en Youtube y las ventas es mas fuerte que las demas con un coeficiente de corelacion de aproximadamente 0.8, es decir que tiene mas relevancia en las ventas que facebook y newspapers.
a continuacion vmos a estimar un modelo de regresion lienal simple entre las ventas en funcion de la inversion en mercadeo en youtube.
mod_you=lm(sales~youtube, data = marketing)
mod_you
##
## Call:
## lm(formula = sales ~ youtube, data = marketing)
##
## Coefficients:
## (Intercept) youtube
## 8.43911 0.04754
##enlace para simbolos: https://www.codecogs.com/latex/eqneditor.php?lang=es-es
El modelo estimado es igual a \(sales = 8.43911 + (0.04754*youtube)\)en donde \(beta_0 = 8.43911\) y \(beta_1 = 0.04754\) podemos interpretar lo sigueinte que las ventas promedio espreadas cuando no se invierte en oyutbe son de 8.33911 millones de dolares y por cada 1000 odlares adicionales de invesion en mercadeo en youtube se espera un retorno en ventas de 0.04754 millones de dolares.
##interpretacion del summary del modelo
summary(mod_you)
##
## Call:
## lm(formula = sales ~ youtube, data = marketing)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.0632 -2.3454 -0.2295 2.4805 8.6548
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.439112 0.549412 15.36 <2e-16 ***
## youtube 0.047537 0.002691 17.67 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.91 on 198 degrees of freedom
## Multiple R-squared: 0.6119, Adjusted R-squared: 0.6099
## F-statistic: 312.1 on 1 and 198 DF, p-value: < 2.2e-16
predict(mod_you,list(youtube=65),interval = "confidence",level=0.95)
## fit lwr upr
## 1 11.52899 10.72462 12.33337
El promedio esperado de ventas para una eventual inversion en youtube 65 mil dolares seria de 11.528 millones de dolares. El intervalo dwe confianza para la prediccion de las ventas nos indica que el promedio de ventas estria entre los 10.72 y 12.33 millones de dolares con un 95% de confianza.
## Paso 1: Segmentar los datos
id_modelar=sample(1:200,size = 160)
marketing_modelar=marketing[id_modelar,]
marketing_validar=marketing[-id_modelar,]
## Paso 2: estimar el modelo set de modelar
mod_you_modelar=lm(sales~youtube,data=marketing_modelar)
mod_you_modelar
##
## Call:
## lm(formula = sales ~ youtube, data = marketing_modelar)
##
## Coefficients:
## (Intercept) youtube
## 8.4405 0.0472
## Paso 3 Predecir set de validacion
sales_pred=predict(mod_you_modelar,list(youtube=marketing_validar$youtube))
## Paso 4 - Comparar ventas del modelo y reales
sales_real=marketing_validar$sales
error=sales_real-sales_pred
res=data.frame(sales_real,sales_pred,error)
res
## sales_real sales_pred error
## 1 12.48 10.960932 1.5190680
## 2 15.48 18.680798 -3.2007981
## 3 5.76 8.927600 -3.1675997
## 4 10.32 12.184330 -1.8643297
## 5 13.56 12.359910 1.2000901
## 6 14.28 14.835025 -0.5550248
## 7 11.40 13.860838 -2.4608377
## 8 17.64 12.671423 4.9685768
## 9 24.84 25.069653 -0.2296529
## 10 15.48 20.159070 -4.6790704
## 11 27.84 22.028150 5.8118498
## 12 12.84 14.127040 -1.2870400
## 13 27.12 20.697139 6.4228612
## 14 28.44 19.705960 8.7340399
## 15 6.60 8.853969 -2.2539692
## 16 15.84 16.154708 -0.3147084
## 17 11.16 12.348582 -1.1885822
## 18 22.68 21.886553 0.7934467
## 19 13.56 12.705407 0.8545935
## 20 15.48 13.441711 2.0382893
## 21 13.80 14.523511 -0.7235114
## 22 14.04 21.036972 -6.9969715
## 23 17.64 19.082933 -1.4429335
## 24 24.84 21.931864 2.9081357
## 25 8.64 9.856476 -1.2164757
## 26 15.12 12.694079 2.4259213
## 27 19.08 15.560001 3.5199988
## 28 8.40 9.505315 -1.1053152
## 29 11.64 11.816178 -0.1761776
## 30 13.92 11.176159 2.7438406
## 31 22.80 18.142730 4.6572703
## 32 18.36 13.758888 4.6011121
## 33 15.48 15.899834 -0.4198339
## 34 14.28 21.722301 -7.4423008
## 35 12.60 17.310140 -4.7101396
## 36 19.08 24.639198 -5.5591981
## 37 8.04 9.499651 -1.4596514
## 38 11.88 12.716734 -0.8367343
## 39 23.52 17.887855 5.6321448
## 40 30.60 24.503265 6.0967349
## Paso 5: Calcular Indicador de ajuste de la prediccion
MAE=mean(abs(error)) ##Error medio absoluto
MAE
## [1] 2.955471