library(pacman)
library(readxl)
p_load("base64enc", "htmltools", "mime", "xfun", "prettydoc", "readr", "ggplot2", "tidyr", "plotly", "DT")
td <- read_excel("twitchdiscord.xlsx")
La correlación aquí, es que Discord y Twitch comparten usarios, y de que si una persona busca “Twitch”, termine buscando “Discord”.
datatable(td)
pairs(td)
cor(td)
## twitch discord
## twitch 1.0000000 0.8894734
## discord 0.8894734 1.0000000
regresion <- lm(twitch ~ discord , data=td)
summary(regresion)
##
## Call:
## lm(formula = twitch ~ discord, data = td)
##
## Residuals:
## Min 1Q Median 3Q Max
## -26.522 -5.127 -1.063 6.182 21.372
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.98283 3.01259 3.646 0.000635 ***
## discord 0.69824 0.05073 13.763 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.397 on 50 degrees of freedom
## Multiple R-squared: 0.7912, Adjusted R-squared: 0.787
## F-statistic: 189.4 on 1 and 50 DF, p-value: < 2.2e-16
plot(td$twitch ~ td$discord, ylab="Twitch", xlab="Discord")
abline(regresion)
nuevas.busquedas <- data.frame(discord = seq(60, 80))
predict(regresion, nuevas.busquedas)
## 1 2 3 4 5 6 7 8
## 52.87707 53.57531 54.27355 54.97178 55.67002 56.36826 57.06650 57.76473
## 9 10 11 12 13 14 15 16
## 58.46297 59.16121 59.85944 60.55768 61.25592 61.95416 62.65239 63.35063
## 17 18 19 20 21
## 64.04887 64.74711 65.44534 66.14358 66.84182