#Cargue de librerias
library(ggplot2)
library(plotly)
## Warning: package 'plotly' was built under R version 4.1.2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(datarium)
## Warning: package 'datarium' was built under R version 4.1.3
data("marketing")
head(marketing)
##   youtube facebook newspaper sales
## 1  276.12    45.36     83.04 26.52
## 2   53.40    47.16     54.12 12.48
## 3   20.64    55.08     83.16 11.16
## 4  181.80    49.56     70.20 22.20
## 5  216.96    12.96     70.08 15.48
## 6   10.44    58.68     90.00  8.64
#exploracion de ventas
mean(marketing$sales)
## [1] 16.827
sd(marketing$sales)
## [1] 6.260948
#exploracion univariada
g1=ggplot(data=marketing, mapping= aes(x=sales))+geom_histogram(fill="blue")+theme_bw()
ggplotly(g1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#exploracion bivariada: ventas vs inversion en periodico
g2=ggplot(data=marketing,mapping=
            aes(x=newspaper,y=sales))+geom_point()+theme_bw()+
            geom_smooth()       
ggplotly(g2)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
cor(marketing$sales,marketing$newspaper)
## [1] 0.228299
#exploracion bivariada: ventas vs inversio en redes
g3=ggplot(data=marketing,mapping=
            aes(x=facebook,y=sales))+geom_point()+theme_bw()+
            geom_smooth()       
ggplotly(g3)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
cor(marketing$sales,marketing$facebook)
## [1] 0.5762226
#exploración bivariada: ventas vs inversión en redes(youtube), aprox con lm
g4=ggplot(data=marketing,mapping=
            aes(x=youtube,y=sales))+geom_point()+theme_bw()+
            geom_smooth(method = "lm")       
ggplotly(g4)
## `geom_smooth()` using formula 'y ~ x'
cor(marketing$sales,marketing$youtube)
## [1] 0.7822244

Estimacion Modeloe Lineal

mod_1=lm(sales~youtube,data=marketing)
summary(mod_1)
## 
## Call:
## lm(formula = sales ~ youtube, data = marketing)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.0632  -2.3454  -0.2295   2.4805   8.6548 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 8.439112   0.549412   15.36   <2e-16 ***
## youtube     0.047537   0.002691   17.67   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.91 on 198 degrees of freedom
## Multiple R-squared:  0.6119, Adjusted R-squared:  0.6099 
## F-statistic: 312.1 on 1 and 198 DF,  p-value: < 2.2e-16

Predicciones con el modelo

#Estimar ventas con una inversion en Youtube de 65mil dolares
predict(mod_1,list(youtube=65),interval="confidence",level=0.95)
##        fit      lwr      upr
## 1 11.52899 10.72462 12.33337

Validacion Cruzada

##Paso 1 - Segmentar los datos
id_modelar=sample(1:200, size=160)
marketing_modelar=marketing[id_modelar,]
marketing_validar=marketing[-id_modelar,]

##Paso 2 - Estimar el modelo en el set de entrenamiento
mod_1_modelar=lm(sales~youtube,data=marketing_modelar)

##Paso 3 - Predecir set de validacion
sales_pred=predict(mod_1_modelar,list(youtube=marketing_validar$youtube))

##Paso 4 - Comparar ventas del modelo y reales
sales_real=marketing_validar$sales
error=sales_real-sales_pred
res=data.frame(sales_real,sales_pred,error)

##Paso 5 - Calcular indicador de evaluacion de prediccion
MAE=mean(abs(error))
MAE
## [1] 3.649731