g1=ggplot(data=marketing,mapping = aes(x=sales))+geom_histogram(fill="purple")+theme_bw()
ggplotly(g1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#exploracion bivariada - (Relaciones entre ventas con mercadeo)
g2=ggplot(data = marketing,mapping = aes(x=newspaper,y=sales))+geom_point()+theme_bw()+geom_smooth()
ggplotly(g2)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
cor(marketing$newspaper,marketing$sales)
## [1] 0.228299
g3=ggplot(data = marketing,mapping = aes(x=facebook,y=sales))+geom_point()+theme_bw()+geom_smooth()
ggplotly(g3)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
cor(marketing$facebook,marketing$sales)
## [1] 0.5762226
g4=ggplot(data = marketing,mapping = aes(x=youtube,y=sales))+geom_point()+theme_bw()+geom_smooth()
ggplotly(g4)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
cor(marketing$youtube,marketing$sales)
## [1] 0.7822244
#estimacion del modelo
mod_you=lm(sales~youtube,data = marketing)
mod_you
##
## Call:
## lm(formula = sales ~ youtube, data = marketing)
##
## Coefficients:
## (Intercept) youtube
## 8.43911 0.04754
#interpretacion del summary del modelo
summary(mod_you)
##
## Call:
## lm(formula = sales ~ youtube, data = marketing)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.0632 -2.3454 -0.2295 2.4805 8.6548
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.439112 0.549412 15.36 <2e-16 ***
## youtube 0.047537 0.002691 17.67 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.91 on 198 degrees of freedom
## Multiple R-squared: 0.6119, Adjusted R-squared: 0.6099
## F-statistic: 312.1 on 1 and 198 DF, p-value: < 2.2e-16
# predecir con el modelo
#Estimar las ventas para un mes en el cual se inviertan en mercadeo en youtube 65 millones dolares.
predict(mod_you,list(youtube=65),interval = "confidence",level = 0.95)
## fit lwr upr
## 1 11.52899 10.72462 12.33337
#modelo multiple
mod_yf=lm(sales~youtube+facebook,data = marketing)
mod_yf
##
## Call:
## lm(formula = sales ~ youtube + facebook, data = marketing)
##
## Coefficients:
## (Intercept) youtube facebook
## 3.50532 0.04575 0.18799
summary(mod_yf)
##
## Call:
## lm(formula = sales ~ youtube + facebook, data = marketing)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.5572 -1.0502 0.2906 1.4049 3.3994
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.50532 0.35339 9.919 <2e-16 ***
## youtube 0.04575 0.00139 32.909 <2e-16 ***
## facebook 0.18799 0.00804 23.382 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.018 on 197 degrees of freedom
## Multiple R-squared: 0.8972, Adjusted R-squared: 0.8962
## F-statistic: 859.6 on 2 and 197 DF, p-value: < 2.2e-16
predict(mod_yf,list(youtube=65,facebook=65),interval = "confidence",level = 0.95)
## fit lwr upr
## 1 18.69901 17.96556 19.43246
#paso 1 segmentar los datos
id_modelar=sample(1:200,size = 160)
marketing_modelar=marketing[id_modelar,]
marketing_validar=marketing[-id_modelar,]
#paso 2 estimar el modelo set de modelar
mod_you_modelar=lm(sales~youtube,data = marketing_modelar)
#paso 3 predecir set de validacion
sales_pred=predict(mod_you_modelar,list(youtube=marketing_validar$youtube))
#paso 4 comparar ventas del modelo y reales
sales_real=marketing_validar$sales
error=sales_real-sales_pred
res=data.frame(sales_real,sales_pred,error)
#paso 5 calcular indicador de evaluacion de la prediccion
MAE=mean(abs(error)) #mean absolut error (error medio absoluto)
MAE
## [1] 3.513285
El modelo de regresión lineal simple indica que, con una inversión de 65 millones, se proyecta un rango de ventas entre 10,72 y 12,33 millones. Además, muestra que las ventas estimadas sin inversión en publicidad en YouTube son de 8,44 millones. Por cada unidad adicional invertida en esta plataforma, las ventas aumentarían en promedio un 0,04.