require(ggplot2)
require(plotly)
require(datarium)

data("marketing")
head(marketing)
##   youtube facebook newspaper sales
## 1  276.12    45.36     83.04 26.52
## 2   53.40    47.16     54.12 12.48
## 3   20.64    55.08     83.16 11.16
## 4  181.80    49.56     70.20 22.20
## 5  216.96    12.96     70.08 15.48
## 6   10.44    58.68     90.00  8.64
marketing
##     youtube facebook newspaper sales
## 1    276.12    45.36     83.04 26.52
## 2     53.40    47.16     54.12 12.48
## 3     20.64    55.08     83.16 11.16
## 4    181.80    49.56     70.20 22.20
## 5    216.96    12.96     70.08 15.48
## 6     10.44    58.68     90.00  8.64
## 7     69.00    39.36     28.20 14.16
## 8    144.24    23.52     13.92 15.84
## 9     10.32     2.52      1.20  5.76
## 10   239.76     3.12     25.44 12.72
## 11    79.32     6.96     29.04 10.32
## 12   257.64    28.80      4.80 20.88
## 13    28.56    42.12     79.08 11.04
## 14   117.00     9.12      8.64 11.64
## 15   244.92    39.48     55.20 22.80
## 16   234.48    57.24     63.48 26.88
## 17    81.36    43.92    136.80 15.00
## 18   337.68    47.52     66.96 29.28
## 19    83.04    24.60     21.96 13.56
## 20   176.76    28.68     22.92 17.52
## 21   262.08    33.24     64.08 21.60
## 22   284.88     6.12     28.20 15.00
## 23    15.84    19.08     59.52  6.72
## 24   273.96    20.28     31.44 18.60
## 25    74.76    15.12     21.96 11.64
## 26   315.48     4.20     23.40 14.40
## 27   171.48    35.16     15.12 18.00
## 28   288.12    20.04     27.48 19.08
## 29   298.56    32.52     27.48 22.68
## 30    84.72    19.20     48.96 12.60
## 31   351.48    33.96     51.84 25.68
## 32   135.48    20.88     46.32 14.28
## 33   116.64     1.80     36.00 11.52
## 34   318.72    24.00      0.36 20.88
## 35   114.84     1.68      8.88 11.40
## 36   348.84     4.92     10.20 15.36
## 37   320.28    52.56      6.00 30.48
## 38    89.64    59.28     54.84 17.64
## 39    51.72    32.04     42.12 12.12
## 40   273.60    45.24     38.40 25.80
## 41   243.00    26.76     37.92 19.92
## 42   212.40    40.08     46.44 20.52
## 43   352.32    33.24      2.16 24.84
## 44   248.28    10.08     31.68 15.48
## 45    30.12    30.84     51.96 10.20
## 46   210.12    27.00     37.80 17.88
## 47   107.64    11.88     42.84 12.72
## 48   287.88    49.80     22.20 27.84
## 49   272.64    18.96     59.88 17.76
## 50    80.28    14.04     44.16 11.64
## 51   239.76     3.72     41.52 13.68
## 52   120.48    11.52      4.32 12.84
## 53   259.68    50.04     47.52 27.12
## 54   219.12    55.44     70.44 25.44
## 55   315.24    34.56     19.08 24.24
## 56   238.68    59.28     72.00 28.44
## 57     8.76    33.72     49.68  6.60
## 58   163.44    23.04     19.92 15.84
## 59   252.96    59.52     45.24 28.56
## 60   252.84    35.40     11.16 22.08
## 61    64.20     2.40     25.68  9.72
## 62   313.56    51.24     65.64 29.04
## 63   287.16    18.60     32.76 18.84
## 64   123.24    35.52     10.08 16.80
## 65   157.32    51.36     34.68 21.60
## 66    82.80    11.16      1.08 11.16
## 67    37.80    29.52      2.64 11.40
## 68   167.16    17.40     12.24 16.08
## 69   284.88    33.00     13.20 22.68
## 70   260.16    52.68     32.64 26.76
## 71   238.92    36.72     46.44 21.96
## 72   131.76    17.16     38.04 14.88
## 73    32.16    39.60     23.16 10.56
## 74   155.28     6.84     37.56 13.20
## 75   256.08    29.52     15.72 20.40
## 76    20.28    52.44    107.28 10.44
## 77    33.00     1.92     24.84  8.28
## 78   144.60    34.20     17.04 17.04
## 79     6.48    35.88     11.28  6.36
## 80   139.20     9.24     27.72 13.20
## 81    91.68    32.04     26.76 14.16
## 82   287.76     4.92     44.28 14.76
## 83    90.36    24.36     39.00 13.56
## 84    82.08    53.40     42.72 16.32
## 85   256.20    51.60     40.56 26.04
## 86   231.84    22.08     78.84 18.24
## 87    91.56    33.00     19.20 14.40
## 88   132.84    48.72     75.84 19.20
## 89   105.96    30.60     88.08 15.48
## 90   131.76    57.36     61.68 20.04
## 91   161.16     5.88     11.16 13.44
## 92    34.32     1.80     39.60  8.76
## 93   261.24    40.20     70.80 23.28
## 94   301.08    43.80     86.76 26.64
## 95   128.88    16.80     13.08 13.80
## 96   195.96    37.92     63.48 20.28
## 97   237.12     4.20      7.08 14.04
## 98   221.88    25.20     26.40 18.60
## 99   347.64    50.76     61.44 30.48
## 100  162.24    50.04     55.08 20.64
## 101  266.88     5.16     59.76 14.04
## 102  355.68    43.56    121.08 28.56
## 103  336.24    12.12     25.68 17.76
## 104  225.48    20.64     21.48 17.64
## 105  285.84    41.16      6.36 24.84
## 106  165.48    55.68     70.80 23.04
## 107   30.00    13.20     35.64  8.64
## 108  108.48     0.36     27.84 10.44
## 109   15.72     0.48     30.72  6.36
## 110  306.48    32.28      6.60 23.76
## 111  270.96     9.84     67.80 16.08
## 112  290.04    45.60     27.84 26.16
## 113  210.84    18.48      2.88 16.92
## 114  251.52    24.72     12.84 19.08
## 115   93.84    56.16     41.40 17.52
## 116   90.12    42.00     63.24 15.12
## 117  167.04    17.16     30.72 14.64
## 118   91.68     0.96     17.76 11.28
## 119  150.84    44.28     95.04 19.08
## 120   23.28    19.20     26.76  7.92
## 121  169.56    32.16     55.44 18.60
## 122   22.56    26.04     60.48  8.40
## 123  268.80     2.88     18.72 13.92
## 124  147.72    41.52     14.88 18.24
## 125  275.40    38.76     89.04 23.64
## 126  104.64    14.16     31.08 12.72
## 127    9.36    46.68     60.72  7.92
## 128   96.24     0.00     11.04 10.56
## 129  264.36    58.80      3.84 29.64
## 130   71.52    14.40     51.72 11.64
## 131    0.84    47.52     10.44  1.92
## 132  318.24     3.48     51.60 15.24
## 133   10.08    32.64      2.52  6.84
## 134  263.76    40.20     54.12 23.52
## 135   44.28    46.32     78.72 12.96
## 136   57.96    56.40     10.20 13.92
## 137   30.72    46.80     11.16 11.40
## 138  328.44    34.68     71.64 24.96
## 139   51.60    31.08     24.60 11.52
## 140  221.88    52.68      2.04 24.84
## 141   88.08    20.40     15.48 13.08
## 142  232.44    42.48     90.72 23.04
## 143  264.60    39.84     45.48 24.12
## 144  125.52     6.84     41.28 12.48
## 145  115.44    17.76     46.68 13.68
## 146  168.36     2.28     10.80 12.36
## 147  288.12     8.76     10.44 15.84
## 148  291.84    58.80     53.16 30.48
## 149   45.60    48.36     14.28 13.08
## 150   53.64    30.96     24.72 12.12
## 151  336.84    16.68     44.40 19.32
## 152  145.20    10.08     58.44 13.92
## 153  237.12    27.96     17.04 19.92
## 154  205.56    47.64     45.24 22.80
## 155  225.36    25.32     11.40 18.72
## 156    4.92    13.92      6.84  3.84
## 157  112.68    52.20     60.60 18.36
## 158  179.76     1.56     29.16 12.12
## 159   14.04    44.28     54.24  8.76
## 160  158.04    22.08     41.52 15.48
## 161  207.00    21.72     36.84 17.28
## 162  102.84    42.96     59.16 15.96
## 163  226.08    21.72     30.72 17.88
## 164  196.20    44.16      8.88 21.60
## 165  140.64    17.64      6.48 14.28
## 166  281.40     4.08    101.76 14.28
## 167   21.48    45.12     25.92  9.60
## 168  248.16     6.24     23.28 14.64
## 169  258.48    28.32     69.12 20.52
## 170  341.16    12.72      7.68 18.00
## 171   60.00    13.92     22.08 10.08
## 172  197.40    25.08     56.88 17.40
## 173   23.52    24.12     20.40  9.12
## 174  202.08     8.52     15.36 14.04
## 175  266.88     4.08     15.72 13.80
## 176  332.28    58.68     50.16 32.40
## 177  298.08    36.24     24.36 24.24
## 178  204.24     9.36     42.24 14.04
## 179  332.04     2.76     28.44 14.16
## 180  198.72    12.00     21.12 15.12
## 181  187.92     3.12      9.96 12.60
## 182  262.20     6.48     32.88 14.64
## 183   67.44     6.84     35.64 10.44
## 184  345.12    51.60     86.16 31.44
## 185  304.56    25.56     36.00 21.12
## 186  246.00    54.12     23.52 27.12
## 187  167.40     2.52     31.92 12.36
## 188  229.32    34.44     21.84 20.76
## 189  343.20    16.68      4.44 19.08
## 190   22.44    14.52     28.08  8.04
## 191   47.40    49.32      6.96 12.96
## 192   90.60    12.96      7.20 11.88
## 193   20.64     4.92     37.92  7.08
## 194  200.16    50.40      4.32 23.52
## 195  179.64    42.72      7.20 20.76
## 196   45.84     4.44     16.56  9.12
## 197  113.04     5.88      9.72 11.64
## 198  212.40    11.16      7.68 15.36
## 199  340.32    50.40     79.44 30.60
## 200  278.52    10.32     10.44 16.08
## Exploracion de las ventas (sales)

promedio=mean(marketing$sales) #promedio o media 
desviacion=sd(marketing$sales) #desviacion estandar

data.frame(promedio,desviacion)
##   promedio desviacion
## 1   16.827   6.260948
g1=ggplot(data = marketing,mapping = aes(x=sales))+geom_histogram(fill="blue")+theme_bw()
g1

ggplotly(g1)
## se observa que las ventas en promedio historico de datos son de 16 milloones de dolares con una desviacion estandar de 6 millones, indicando que las ventas en general varian e manera significativa y suponemos que esas variaciones pueden estar relacionadas con la inversion n mercadeo y por ello el siguiente paso es un analisis exploratorio bivariado.


## Exploracion Bivariada - (Relacion entre ventas con mercadeo)

g2=ggplot(data = marketing,mapping = aes(x=newspaper,y=sales))+geom_point()+theme_bw()+geom_smooth()
g2 ## la variable ventas no se observa muy relacionada con la variable de mercadeo 

cor(marketing$newspaper,marketing$sales)  ## este coeficiente mas 1 positivo tienen mas relacion si es -1 es al contrario, cuando es cercano a 0 no hay mucha relacion.
## [1] 0.228299

Se puede observar que no se evidencia una relacion entre la variable inversion en mercadeo por medio de periodico y las ventas, con un coeficiente de corelacion muy bajo de 0.22(muy bajo)

g3=ggplot(data = marketing,mapping = aes(x=facebook,y=sales))+geom_point()+theme_bw()+geom_smooth()
g3

cor(marketing$facebook,marketing$sales)
## [1] 0.5762226

se observa por otro lado una relacion positiva directa entre la inversion en publicidad de facebook con las ventas , esta relacion tiene una fuerza media ya que el coeficiente es de 0.6

g4=ggplot(data = marketing,mapping = aes(x=youtube,y=sales))+geom_point()+theme_bw()+geom_smooth()
g4

cor(marketing$youtube,marketing$sales)
## [1] 0.7822244

Se observa que la relacion entre la inversion en mercadeo en Youtube y las ventas es mas fuerte que las demas con un coeficiente de corelacion de aproximadamente 0.8, es decir que tiene mas relevancia en las ventas que facebook y newspapers.

Estimacion de Modelo de Regresion Lineal Simple

a continuacion vmos a estimar un modelo de regresion lienal simple entre las ventas en funcion de la inversion en mercadeo en youtube.

mod_you=lm(sales~youtube, data = marketing)
mod_you
## 
## Call:
## lm(formula = sales ~ youtube, data = marketing)
## 
## Coefficients:
## (Intercept)      youtube  
##     8.43911      0.04754

##enlace para simbolos: https://www.codecogs.com/latex/eqneditor.php?lang=es-es

El modelo estimado es igual a \(sales = 8.43911 + (0.04754*youtube)\)en donde \(beta_0 = 8.43911\) y \(beta_1 = 0.04754\) podemos interpretar lo sigueinte que las ventas promedio espreadas cuando no se invierte en oyutbe son de 8.33911 millones de dolares y por cada 1000 odlares adicionales de invesion en mercadeo en youtube se espera un retorno en ventas de 0.04754 millones de dolares.

##interpretacion del summary del modelo
summary(mod_you)
## 
## Call:
## lm(formula = sales ~ youtube, data = marketing)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.0632  -2.3454  -0.2295   2.4805   8.6548 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 8.439112   0.549412   15.36   <2e-16 ***
## youtube     0.047537   0.002691   17.67   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.91 on 198 degrees of freedom
## Multiple R-squared:  0.6119, Adjusted R-squared:  0.6099 
## F-statistic: 312.1 on 1 and 198 DF,  p-value: < 2.2e-16

PREDECIR EL MODELO

predict(mod_you,list(youtube=65),interval = "confidence",level=0.95)
##        fit      lwr      upr
## 1 11.52899 10.72462 12.33337

El promedio esperado de ventas para una eventual inversion en youtube 65 mil dolares seria de 11.528 millones de dolares. El intervalo dwe confianza para la prediccion de las ventas nos indica que el promedio de ventas estria entre los 10.72 y 12.33 millones de dolares con un 95% de confianza.

## Paso 1: Segmentar los datos
id_modelar=sample(1:200,size = 160)
marketing_modelar=marketing[id_modelar,]
marketing_validar=marketing[-id_modelar,]

## Paso 2: estimar el modelo set de modelar
mod_you_modelar=lm(sales~youtube,data=marketing_modelar)
mod_you_modelar
## 
## Call:
## lm(formula = sales ~ youtube, data = marketing_modelar)
## 
## Coefficients:
## (Intercept)      youtube  
##      8.4405       0.0472
## Paso 3 Predecir set de validacion
sales_pred=predict(mod_you_modelar,list(youtube=marketing_validar$youtube))

## Paso 4 - Comparar ventas del modelo y reales

sales_real=marketing_validar$sales
error=sales_real-sales_pred
res=data.frame(sales_real,sales_pred,error)
res
##    sales_real sales_pred      error
## 1       12.48  10.960932  1.5190680
## 2       15.48  18.680798 -3.2007981
## 3        5.76   8.927600 -3.1675997
## 4       10.32  12.184330 -1.8643297
## 5       13.56  12.359910  1.2000901
## 6       14.28  14.835025 -0.5550248
## 7       11.40  13.860838 -2.4608377
## 8       17.64  12.671423  4.9685768
## 9       24.84  25.069653 -0.2296529
## 10      15.48  20.159070 -4.6790704
## 11      27.84  22.028150  5.8118498
## 12      12.84  14.127040 -1.2870400
## 13      27.12  20.697139  6.4228612
## 14      28.44  19.705960  8.7340399
## 15       6.60   8.853969 -2.2539692
## 16      15.84  16.154708 -0.3147084
## 17      11.16  12.348582 -1.1885822
## 18      22.68  21.886553  0.7934467
## 19      13.56  12.705407  0.8545935
## 20      15.48  13.441711  2.0382893
## 21      13.80  14.523511 -0.7235114
## 22      14.04  21.036972 -6.9969715
## 23      17.64  19.082933 -1.4429335
## 24      24.84  21.931864  2.9081357
## 25       8.64   9.856476 -1.2164757
## 26      15.12  12.694079  2.4259213
## 27      19.08  15.560001  3.5199988
## 28       8.40   9.505315 -1.1053152
## 29      11.64  11.816178 -0.1761776
## 30      13.92  11.176159  2.7438406
## 31      22.80  18.142730  4.6572703
## 32      18.36  13.758888  4.6011121
## 33      15.48  15.899834 -0.4198339
## 34      14.28  21.722301 -7.4423008
## 35      12.60  17.310140 -4.7101396
## 36      19.08  24.639198 -5.5591981
## 37       8.04   9.499651 -1.4596514
## 38      11.88  12.716734 -0.8367343
## 39      23.52  17.887855  5.6321448
## 40      30.60  24.503265  6.0967349
## Paso 5: Calcular Indicador de ajuste de la prediccion
MAE=mean(abs(error)) ##Error medio absoluto
MAE
## [1] 2.955471

La validacion cruzada es un primer paso para segmentos los datos dejando el 80% para el modelo y 20% aleatorios para validar. Luego se ajusta el modelo con el 80%. Posteriormente predecimos las ventas del 20% y finalmente