# Install packages if not already installed
packages <- c("tidyverse", "broom", "psych")
installed <- rownames(installed.packages())
for (p in packages) {
if (!(p %in% installed)) install.packages(p)
}
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(broom)
library(psych)
##
## Attaching package: 'psych'
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
# Load data
df <- read.csv("Display_data.csv")
glimpse(df)
## Rows: 29
## Columns: 8
## $ spend <dbl> 22.61, 37.28, 55.57, 45.42, 50.22, 33.05, 12.88, 23.87, 5…
## $ clicks <int> 165, 228, 291, 247, 290, 172, 68, 112, 306, 300, 355, 533…
## $ impressions <int> 8672, 11875, 14631, 11709, 14768, 8698, 2924, 5919, 14789…
## $ display <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ transactions <int> 2, 2, 3, 2, 3, 2, 1, 1, 3, 3, 4, 5, 2, 1, 4, 3, 5, 6, 4, …
## $ revenue <dbl> 58.88, 44.92, 141.56, 209.76, 197.68, 204.36, 117.32, 72.…
## $ ctr <dbl> 1.90, 1.92, 1.99, 2.11, 1.96, 1.98, 2.33, 1.89, 2.07, 2.0…
## $ con_rate <dbl> 1.21, 0.88, 1.03, 0.81, 1.03, 1.16, 1.47, 0.89, 0.98, 1.0…
# Descriptive statistics
psych::describe(df)
## vars n mean sd median trimmed mad min max
## spend 1 29 44.22 21.08 39.68 43.84 22.61 1.12 91.28
## clicks 2 29 257.14 129.23 241.00 248.60 97.85 48.00 593.00
## impressions 3 29 11857.59 6886.92 9934.00 11333.96 6265.47 1862.00 29324.00
## display 4 29 0.31 0.47 0.00 0.28 0.00 0.00 1.00
## transactions 5 29 2.97 1.30 3.00 2.92 1.48 1.00 6.00
## revenue 6 29 223.50 132.33 235.16 218.26 113.80 16.16 522.00
## ctr 7 29 2.31 0.48 2.02 2.27 0.12 1.89 3.29
## con_rate 8 29 1.23 0.35 1.13 1.20 0.28 0.81 2.08
## range skew kurtosis se
## spend 90.16 0.29 -0.46 3.91
## clicks 545.00 0.74 0.21 24.00
## impressions 27462.00 0.81 -0.04 1278.87
## display 1.00 0.78 -1.44 0.09
## transactions 5.00 0.25 -0.64 0.24
## revenue 505.84 0.22 -0.68 24.57
## ctr 1.40 0.88 -0.99 0.09
## con_rate 1.27 0.77 -0.63 0.06
# Correlation matrix
df %>%
select_if(is.numeric) %>%
cor() %>%
round(2)
## spend clicks impressions display transactions revenue ctr
## spend 1.00 0.97 0.94 -0.35 0.85 0.77 -0.32
## clicks 0.97 1.00 0.97 -0.33 0.89 0.79 -0.30
## impressions 0.94 0.97 1.00 -0.53 0.77 0.68 -0.51
## display -0.35 -0.33 -0.53 1.00 0.08 0.02 0.96
## transactions 0.85 0.89 0.77 0.08 1.00 0.81 0.09
## revenue 0.77 0.79 0.68 0.02 0.81 1.00 0.06
## ctr -0.32 -0.30 -0.51 0.96 0.09 0.06 1.00
## con_rate -0.49 -0.44 -0.57 0.87 -0.04 -0.14 0.81
## con_rate
## spend -0.49
## clicks -0.44
## impressions -0.57
## display 0.87
## transactions -0.04
## revenue -0.14
## ctr 0.81
## con_rate 1.00
model1 <- lm(revenue ~ spend, data = df)
summary(model1)
##
## Call:
## lm(formula = revenue ~ spend, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -145.210 -54.647 1.117 67.780 149.476
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.9397 37.9668 0.288 0.775
## spend 4.8066 0.7775 6.182 1.31e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 86.71 on 27 degrees of freedom
## Multiple R-squared: 0.586, Adjusted R-squared: 0.5707
## F-statistic: 38.22 on 1 and 27 DF, p-value: 1.311e-06
# Tidy summary
tidy(model1)
## # A tibble: 2 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 10.9 38.0 0.288 0.775
## 2 spend 4.81 0.778 6.18 0.00000131
# Plot
ggplot(df, aes(x = spend, y = revenue)) +
geom_point() +
geom_smooth(method = "lm", se = TRUE, color = "purple") +
labs(title = "Revenue vs. Spend", x = "Spend", y = "Revenue")
## `geom_smooth()` using formula = 'y ~ x'
Interpretation: The model shows a significant positive relationship between spend and revenue. Increasing spend is likely to lead to increased revenue.
model2 <- lm(revenue ~ spend + display, data = df)
summary(model2)
##
## Call:
## lm(formula = revenue ~ spend + display, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -176.730 -35.020 8.661 56.440 129.231
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -50.8612 40.3336 -1.261 0.21850
## spend 5.5473 0.7415 7.482 6.07e-08 ***
## display 93.5856 33.1910 2.820 0.00908 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 77.33 on 26 degrees of freedom
## Multiple R-squared: 0.6829, Adjusted R-squared: 0.6586
## F-statistic: 28 on 2 and 26 DF, p-value: 3.271e-07
# Tidy summary
tidy(model2)
## # A tibble: 3 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -50.9 40.3 -1.26 0.219
## 2 spend 5.55 0.741 7.48 0.0000000607
## 3 display 93.6 33.2 2.82 0.00908
Interpretation: Spend continues to be a significant predictor of revenue. The impact of display ads can be evaluated by its coefficient and p-value.