📥 Load Packages and Data

# Install packages if not already installed
packages <- c("tidyverse", "broom", "psych")
installed <- rownames(installed.packages())
for (p in packages) {
  if (!(p %in% installed)) install.packages(p)
}
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(broom)
library(psych)
## 
## Attaching package: 'psych'
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
# Load data
df <- read.csv("Display_data.csv")
glimpse(df)
## Rows: 29
## Columns: 8
## $ spend        <dbl> 22.61, 37.28, 55.57, 45.42, 50.22, 33.05, 12.88, 23.87, 5…
## $ clicks       <int> 165, 228, 291, 247, 290, 172, 68, 112, 306, 300, 355, 533…
## $ impressions  <int> 8672, 11875, 14631, 11709, 14768, 8698, 2924, 5919, 14789…
## $ display      <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ transactions <int> 2, 2, 3, 2, 3, 2, 1, 1, 3, 3, 4, 5, 2, 1, 4, 3, 5, 6, 4, …
## $ revenue      <dbl> 58.88, 44.92, 141.56, 209.76, 197.68, 204.36, 117.32, 72.…
## $ ctr          <dbl> 1.90, 1.92, 1.99, 2.11, 1.96, 1.98, 2.33, 1.89, 2.07, 2.0…
## $ con_rate     <dbl> 1.21, 0.88, 1.03, 0.81, 1.03, 1.16, 1.47, 0.89, 0.98, 1.0…

📊 Descriptive Statistics

# Descriptive statistics
psych::describe(df)
##              vars  n     mean      sd  median  trimmed     mad     min      max
## spend           1 29    44.22   21.08   39.68    43.84   22.61    1.12    91.28
## clicks          2 29   257.14  129.23  241.00   248.60   97.85   48.00   593.00
## impressions     3 29 11857.59 6886.92 9934.00 11333.96 6265.47 1862.00 29324.00
## display         4 29     0.31    0.47    0.00     0.28    0.00    0.00     1.00
## transactions    5 29     2.97    1.30    3.00     2.92    1.48    1.00     6.00
## revenue         6 29   223.50  132.33  235.16   218.26  113.80   16.16   522.00
## ctr             7 29     2.31    0.48    2.02     2.27    0.12    1.89     3.29
## con_rate        8 29     1.23    0.35    1.13     1.20    0.28    0.81     2.08
##                 range skew kurtosis      se
## spend           90.16 0.29    -0.46    3.91
## clicks         545.00 0.74     0.21   24.00
## impressions  27462.00 0.81    -0.04 1278.87
## display          1.00 0.78    -1.44    0.09
## transactions     5.00 0.25    -0.64    0.24
## revenue        505.84 0.22    -0.68   24.57
## ctr              1.40 0.88    -0.99    0.09
## con_rate         1.27 0.77    -0.63    0.06
# Correlation matrix
df %>%
  select_if(is.numeric) %>%
  cor() %>%
  round(2)
##              spend clicks impressions display transactions revenue   ctr
## spend         1.00   0.97        0.94   -0.35         0.85    0.77 -0.32
## clicks        0.97   1.00        0.97   -0.33         0.89    0.79 -0.30
## impressions   0.94   0.97        1.00   -0.53         0.77    0.68 -0.51
## display      -0.35  -0.33       -0.53    1.00         0.08    0.02  0.96
## transactions  0.85   0.89        0.77    0.08         1.00    0.81  0.09
## revenue       0.77   0.79        0.68    0.02         0.81    1.00  0.06
## ctr          -0.32  -0.30       -0.51    0.96         0.09    0.06  1.00
## con_rate     -0.49  -0.44       -0.57    0.87        -0.04   -0.14  0.81
##              con_rate
## spend           -0.49
## clicks          -0.44
## impressions     -0.57
## display          0.87
## transactions    -0.04
## revenue         -0.14
## ctr              0.81
## con_rate         1.00

📈 Hypotheses


🔍 Simple Linear Regression: Revenue ~ Spend

model1 <- lm(revenue ~ spend, data = df)
summary(model1)
## 
## Call:
## lm(formula = revenue ~ spend, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -145.210  -54.647    1.117   67.780  149.476 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  10.9397    37.9668   0.288    0.775    
## spend         4.8066     0.7775   6.182 1.31e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 86.71 on 27 degrees of freedom
## Multiple R-squared:  0.586,  Adjusted R-squared:  0.5707 
## F-statistic: 38.22 on 1 and 27 DF,  p-value: 1.311e-06
# Tidy summary
tidy(model1)
## # A tibble: 2 × 5
##   term        estimate std.error statistic    p.value
##   <chr>          <dbl>     <dbl>     <dbl>      <dbl>
## 1 (Intercept)    10.9     38.0       0.288 0.775     
## 2 spend           4.81     0.778     6.18  0.00000131
# Plot
ggplot(df, aes(x = spend, y = revenue)) +
  geom_point() +
  geom_smooth(method = "lm", se = TRUE, color = "purple") +
  labs(title = "Revenue vs. Spend", x = "Spend", y = "Revenue")
## `geom_smooth()` using formula = 'y ~ x'

Interpretation: The model shows a significant positive relationship between spend and revenue. Increasing spend is likely to lead to increased revenue.


🔁 Multiple Regression: Revenue ~ Spend + Display

model2 <- lm(revenue ~ spend + display, data = df)
summary(model2)
## 
## Call:
## lm(formula = revenue ~ spend + display, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -176.730  -35.020    8.661   56.440  129.231 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -50.8612    40.3336  -1.261  0.21850    
## spend         5.5473     0.7415   7.482 6.07e-08 ***
## display      93.5856    33.1910   2.820  0.00908 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 77.33 on 26 degrees of freedom
## Multiple R-squared:  0.6829, Adjusted R-squared:  0.6586 
## F-statistic:    28 on 2 and 26 DF,  p-value: 3.271e-07
# Tidy summary
tidy(model2)
## # A tibble: 3 × 5
##   term        estimate std.error statistic      p.value
##   <chr>          <dbl>     <dbl>     <dbl>        <dbl>
## 1 (Intercept)   -50.9     40.3       -1.26 0.219       
## 2 spend           5.55     0.741      7.48 0.0000000607
## 3 display        93.6     33.2        2.82 0.00908

Interpretation: Spend continues to be a significant predictor of revenue. The impact of display ads can be evaluated by its coefficient and p-value.