require(ggplot2)
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.3.3
require(plotly)
## Loading required package: plotly
## Warning: package 'plotly' was built under R version 4.3.3
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
require(datarium)
## Loading required package: datarium
## Warning: package 'datarium' was built under R version 4.3.3
data("marketing")
head(marketing)
##   youtube facebook newspaper sales
## 1  276.12    45.36     83.04 26.52
## 2   53.40    47.16     54.12 12.48
## 3   20.64    55.08     83.16 11.16
## 4  181.80    49.56     70.20 22.20
## 5  216.96    12.96     70.08 15.48
## 6   10.44    58.68     90.00  8.64
promedio = mean(marketing$sales)
desviación = sd(marketing$sales)

data.frame(promedio, desviación)
##   promedio desviación
## 1   16.827   6.260948
g1=ggplot(data = marketing,mapping = aes(x=sales))+geom_histogram(fill="blue")+theme_bw()
ggplotly(g1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
g2 = ggplot(data=marketing, mapping = aes(x=newspaper, y=sales))+geom_point()+theme_bw()+geom_smooth()
ggplotly(g2)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
cor(marketing$sales, marketing$newspaper)
## [1] 0.228299
g3 = ggplot(data=marketing, mapping = aes(x=facebook, y=sales))+geom_point()+theme_bw()+geom_smooth()
ggplotly(g3)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
cor(marketing$sales, marketing$facebook)
## [1] 0.5762226
g4 = ggplot(data=marketing, mapping = aes(x=youtube, y=sales))+geom_point()+theme_bw()+geom_smooth()
ggplotly(g4)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
cor(marketing$sales, marketing$youtube)
## [1] 0.7822244
mod_you=lm(sales~youtube, data = marketing)
mod_you
## 
## Call:
## lm(formula = sales ~ youtube, data = marketing)
## 
## Coefficients:
## (Intercept)      youtube  
##     8.43911      0.04754
summary(mod_you)
## 
## Call:
## lm(formula = sales ~ youtube, data = marketing)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.0632  -2.3454  -0.2295   2.4805   8.6548 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 8.439112   0.549412   15.36   <2e-16 ***
## youtube     0.047537   0.002691   17.67   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.91 on 198 degrees of freedom
## Multiple R-squared:  0.6119, Adjusted R-squared:  0.6099 
## F-statistic: 312.1 on 1 and 198 DF,  p-value: < 2.2e-16
mod_face=lm(sales~facebook, data = marketing)
mod_face
## 
## Call:
## lm(formula = sales ~ facebook, data = marketing)
## 
## Coefficients:
## (Intercept)     facebook  
##     11.1740       0.2025
summary(mod_face)
## 
## Call:
## lm(formula = sales ~ facebook, data = marketing)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -18.8766  -2.5589   0.9248   3.3330   9.8173 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 11.17397    0.67548  16.542   <2e-16 ***
## facebook     0.20250    0.02041   9.921   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.13 on 198 degrees of freedom
## Multiple R-squared:  0.332,  Adjusted R-squared:  0.3287 
## F-statistic: 98.42 on 1 and 198 DF,  p-value: < 2.2e-16
mod_new=lm(sales~newspaper, data = marketing)
mod_new
## 
## Call:
## lm(formula = sales ~ newspaper, data = marketing)
## 
## Coefficients:
## (Intercept)    newspaper  
##    14.82169      0.05469
summary(mod_new)
## 
## Call:
## lm(formula = sales ~ newspaper, data = marketing)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -13.473  -4.065  -1.007   4.207  15.330 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 14.82169    0.74570   19.88  < 2e-16 ***
## newspaper    0.05469    0.01658    3.30  0.00115 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.111 on 198 degrees of freedom
## Multiple R-squared:  0.05212,    Adjusted R-squared:  0.04733 
## F-statistic: 10.89 on 1 and 198 DF,  p-value: 0.001148
predict(mod_you,list(youtube=65), interval = "confidence", level=0.95)
##        fit      lwr      upr
## 1 11.52899 10.72462 12.33337
predict(mod_face,list(facebook=65), interval = "confidence", level=0.95)
##        fit      lwr     upr
## 1 24.33619 22.68098 25.9914
##Paso 1 - Segmentar los Datos
id_modelar=sample(1:200,size = 160)
marketing_modelar=marketing[id_modelar,]
marketing_validar=marketing[-id_modelar,]

##Paso 2 - Estimar el Modelo Set de Modelar
mod_you_modelar=lm(sales~youtube,data=marketing_modelar)

##Paso 3 - Predeccir Set de Validación
sales_pred=predict(mod_you_modelar,list(youtube=marketing_validar$youtube))

##Paso 4 - Comparar Ventas del Modelo y Reales
sales_real=marketing_validar$sales
error=sales_real-sales_pred
res=data.frame(sales_real,sales_pred,error)

##Paso 5 - Calcular Indicador de Evaluación de la Predicción
MAE=mean(abs(error)) #Mean Absolut Error (Error Medio Absoluto)
MAE
## [1] 3.175392
##Paso 1 - Segmentar los Datos
id_modelar=sample(1:200,size = 160)
marketing_modelar=marketing[id_modelar,]
marketing_validar=marketing[-id_modelar,]

##Paso 2 - Estimar el Modelo Set de Modelar
mod_face_modelar=lm(sales~facebook,data=marketing_modelar)

##Paso 3 - Predeccir Set de Validación
sales_pred=predict(mod_face_modelar,list(facebook=marketing_validar$facebook))

##Paso 4 - Comparar Ventas del Modelo y Reales
sales_real=marketing_validar$sales
error=sales_real-sales_pred
res=data.frame(sales_real,sales_pred,error)

##Paso 5 - Calcular Indicador de Evaluación de la Predicción
MAE=mean(abs(error)) #Mean Absolut Error (Error Medio Absoluto)
MAE
## [1] 4.312971

Conclusión: El modelo de regresión lineal simple para predecir las ventas en base a la inversión en mercadeo vía YouTube más optima es: Ventas ~= 8.44 + 0.047 * Inversión en YouTube, lo cuál significa que la venta mínima, en caso que no haya inversión, será en promedio de 8.44 millones y que por cada millón extra invertido en publicidad en YouTube se observarán 0.047 millones más en ventas. Este modelo cuenta con un R^2 de 0.612, lo que significa que explica el comportamiento del 61% de las ventas, de lo cual se concluye que es un buen modelo.

De acuerdo a este modelo, las ventas predecidas para el próximo periodo en caso de que se inviertan 65 Millones de publicidad en YouTube serán de 11.53 Millones, en un rango de 10.72 - 12.33 millones, con un intérvalo de confianza del 95%.