1. Hypotheses

2. Load Data

df <- read.csv("Display_data.csv")
head(df)
##   spend clicks impressions display transactions revenue  ctr con_rate
## 1 22.61    165        8672       0            2   58.88 1.90     1.21
## 2 37.28    228       11875       0            2   44.92 1.92     0.88
## 3 55.57    291       14631       0            3  141.56 1.99     1.03
## 4 45.42    247       11709       0            2  209.76 2.11     0.81
## 5 50.22    290       14768       0            3  197.68 1.96     1.03
## 6 33.05    172        8698       0            2  204.36 1.98     1.16

3. Summary Statistics

summary(df)
##      spend           clicks       impressions       display      
##  Min.   : 1.12   Min.   : 48.0   Min.   : 1862   Min.   :0.0000  
##  1st Qu.:28.73   1st Qu.:172.0   1st Qu.: 6048   1st Qu.:0.0000  
##  Median :39.68   Median :241.0   Median : 9934   Median :0.0000  
##  Mean   :44.22   Mean   :257.1   Mean   :11858   Mean   :0.3103  
##  3rd Qu.:55.57   3rd Qu.:303.0   3rd Qu.:14789   3rd Qu.:1.0000  
##  Max.   :91.28   Max.   :593.0   Max.   :29324   Max.   :1.0000  
##   transactions      revenue            ctr           con_rate    
##  Min.   :1.000   Min.   : 16.16   Min.   :1.890   Min.   :0.810  
##  1st Qu.:2.000   1st Qu.:117.32   1st Qu.:1.970   1st Qu.:0.990  
##  Median :3.000   Median :235.16   Median :2.020   Median :1.130  
##  Mean   :2.966   Mean   :223.50   Mean   :2.306   Mean   :1.227  
##  3rd Qu.:4.000   3rd Qu.:298.92   3rd Qu.:2.790   3rd Qu.:1.470  
##  Max.   :6.000   Max.   :522.00   Max.   :3.290   Max.   :2.080

4. Simple Linear Regression: Revenue ~ Spend

model_simple <- lm(revenue ~ spend, data = df)
summary(model_simple)
## 
## Call:
## lm(formula = revenue ~ spend, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -145.210  -54.647    1.117   67.780  149.476 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  10.9397    37.9668   0.288    0.775    
## spend         4.8066     0.7775   6.182 1.31e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 86.71 on 27 degrees of freedom
## Multiple R-squared:  0.586,  Adjusted R-squared:  0.5707 
## F-statistic: 38.22 on 1 and 27 DF,  p-value: 1.311e-06
# Visualization
plot(df$spend, df$revenue, 
     main = "Revenue vs Spend", 
     xlab = "Spend", ylab = "Revenue", 
     pch = 19, col = "pink")
abline(model_simple, col = "green", lwd = 2)

5. Multiple Regression: Revenue ~ Spend + Display

model_multi <- lm(revenue ~ spend + display, data = df)
summary(model_multi)
## 
## Call:
## lm(formula = revenue ~ spend + display, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -176.730  -35.020    8.661   56.440  129.231 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -50.8612    40.3336  -1.261  0.21850    
## spend         5.5473     0.7415   7.482 6.07e-08 ***
## display      93.5856    33.1910   2.820  0.00908 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 77.33 on 26 degrees of freedom
## Multiple R-squared:  0.6829, Adjusted R-squared:  0.6586 
## F-statistic:    28 on 2 and 26 DF,  p-value: 3.271e-07

6. Interpretation

7. Recommendations