# Load the dataset
data <- read.csv("Display_data.csv")

# Check structure
str(data)
## 'data.frame':    29 obs. of  8 variables:
##  $ spend       : num  22.6 37.3 55.6 45.4 50.2 ...
##  $ clicks      : int  165 228 291 247 290 172 68 112 306 300 ...
##  $ impressions : int  8672 11875 14631 11709 14768 8698 2924 5919 14789 14818 ...
##  $ display     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ transactions: int  2 2 3 2 3 2 1 1 3 3 ...
##  $ revenue     : num  58.9 44.9 141.6 209.8 197.7 ...
##  $ ctr         : num  1.9 1.92 1.99 2.11 1.96 1.98 2.33 1.89 2.07 2.02 ...
##  $ con_rate    : num  1.21 0.88 1.03 0.81 1.03 1.16 1.47 0.89 0.98 1 ...
# Simple Linear Regression: Revenue ~ Spend
model_simple <- lm(revenue ~ spend, data = data)

# View the summary of the model
summary(model_simple)
## 
## Call:
## lm(formula = revenue ~ spend, data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -145.210  -54.647    1.117   67.780  149.476 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  10.9397    37.9668   0.288    0.775    
## spend         4.8066     0.7775   6.182 1.31e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 86.71 on 27 degrees of freedom
## Multiple R-squared:  0.586,  Adjusted R-squared:  0.5707 
## F-statistic: 38.22 on 1 and 27 DF,  p-value: 1.311e-06
# Visualize the relationship
plot(data$spend, data$revenue,
     main = "Simple Linear Regression: spend vs revenue",
     xlab = "spend",
     ylab = "revenue",
     pch = 19,
     col = "steelblue")

# Add regression line
abline(model_simple, col = "red", lwd = 2)

Multiple reggression

# Load necessary libraries
library(ggplot2)
library(rgl)
## Warning in rgl.init(initValue, onlyNULL): RGL: unable to open X11 display
## Warning: 'rgl.init' failed, will use the null device.
## See '?rgl.useNULL' for ways to avoid this warning.
# Load the data
data <- read.csv("Display_data.csv")

# Fit the multiple regression model (Revenue ~ Clicks + Impressions)
model <- lm(revenue ~ clicks + impressions, data = data)

# Print the summary of the model
summary(model)
## 
## Call:
## lm(formula = revenue ~ clicks + impressions, data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -134.682  -50.377    6.526   45.177  104.722 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -19.765768  32.381796  -0.610    0.547    
## clicks        2.120330   0.429706   4.934    4e-05 ***
## impressions  -0.025465   0.008063  -3.158    0.004 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 72.18 on 26 degrees of freedom
## Multiple R-squared:  0.7237, Adjusted R-squared:  0.7025 
## F-statistic: 34.05 on 2 and 26 DF,  p-value: 5.463e-08
# Create a scatter plot for Clicks vs Revenue with regression line
ggplot(data, aes(x = clicks, y = revenue)) +
  geom_point() +  # Scatter plot
  geom_smooth(method = "lm", se = FALSE, color = "blue") +  # Regression line
  labs(title = "Revenue vs Clicks with Regression Line",
       x = "Clicks", y = "Revenue")
## `geom_smooth()` using formula = 'y ~ x'

# Create a scatter plot for Impressions vs Revenue with regression line
ggplot(data, aes(x = impressions, y = revenue)) +
  geom_point() +  # Scatter plot
  geom_smooth(method = "lm", se = FALSE, color = "blue") +  # Regression line
  labs(title = "Revenue vs Impressions with Regression Line",
       x = "Impressions", y = "Revenue")
## `geom_smooth()` using formula = 'y ~ x'

# 3D Plot for Clicks, Impressions, and Revenue
# Create a 3D scatter plot for Clicks, Impressions, and Revenue
plot3d(data$clicks, data$impressions, data$revenue, type = "s", col = "blue", size = 3)

# Add the regression plane (using coefficients from the model)
plane <- function(x, y) {
  coef(model)[1] + coef(model)[2] * x + coef(model)[3] * y
}

# Create a grid of values for Clicks and Impressions
x_vals <- seq(min(data$clicks), max(data$clicks), length.out = 30)
y_vals <- seq(min(data$impressions), max(data$impressions), length.out = 30)

# Calculate the Z values (Revenue) for the grid
z_vals <- outer(x_vals, y_vals, plane)

# Add the regression surface to the 3D plot
surface3d(x_vals, y_vals, z_vals, alpha = 0.5, col = "red")

Question 2

Reggresion analysis on AB testing

# Load the CSV file
ab_data <- read.csv("ab_testing1.csv")

# View first few rows (optional)
head(ab_data)
##   Ads Purchase
## 1   1      152
## 2   0       21
## 3   2       77
## 4   0       65
## 5   1      183
## 6   1       87
# Convert 'Ads' to factor since it's a categorical variable
ab_data$Ads <- as.factor(ab_data$Ads)

# Run regression using factor(Ads) to compare groups
ab_model <- lm(Purchase ~ Ads, data = ab_data)

# Show regression summary
summary(ab_model)
## 
## Call:
## lm(formula = Purchase ~ Ads, data = ab_data)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -59.75 -22.75  -3.75  30.25  64.29 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    49.00      10.21   4.800 5.69e-05 ***
## Ads1           69.71      15.91   4.383 0.000171 ***
## Ads2           24.75      13.82   1.791 0.084982 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 32.28 on 26 degrees of freedom
## Multiple R-squared:  0.4262, Adjusted R-squared:  0.3821 
## F-statistic: 9.656 on 2 and 26 DF,  p-value: 0.0007308