# Load the dataset
data <- read.csv("Display_data.csv")
# Check structure
str(data)
## 'data.frame': 29 obs. of 8 variables:
## $ spend : num 22.6 37.3 55.6 45.4 50.2 ...
## $ clicks : int 165 228 291 247 290 172 68 112 306 300 ...
## $ impressions : int 8672 11875 14631 11709 14768 8698 2924 5919 14789 14818 ...
## $ display : int 0 0 0 0 0 0 0 0 0 0 ...
## $ transactions: int 2 2 3 2 3 2 1 1 3 3 ...
## $ revenue : num 58.9 44.9 141.6 209.8 197.7 ...
## $ ctr : num 1.9 1.92 1.99 2.11 1.96 1.98 2.33 1.89 2.07 2.02 ...
## $ con_rate : num 1.21 0.88 1.03 0.81 1.03 1.16 1.47 0.89 0.98 1 ...
# Simple Linear Regression: Revenue ~ Spend
model_simple <- lm(revenue ~ spend, data = data)
# View the summary of the model
summary(model_simple)
##
## Call:
## lm(formula = revenue ~ spend, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -145.210 -54.647 1.117 67.780 149.476
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.9397 37.9668 0.288 0.775
## spend 4.8066 0.7775 6.182 1.31e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 86.71 on 27 degrees of freedom
## Multiple R-squared: 0.586, Adjusted R-squared: 0.5707
## F-statistic: 38.22 on 1 and 27 DF, p-value: 1.311e-06
# Visualize the relationship
plot(data$spend, data$revenue,
main = "Simple Linear Regression: spend vs revenue",
xlab = "spend",
ylab = "revenue",
pch = 19,
col = "steelblue")
# Add regression line
abline(model_simple, col = "red", lwd = 2)

Multiple reggression
# Load necessary libraries
library(ggplot2)
library(rgl)
## Warning in rgl.init(initValue, onlyNULL): RGL: unable to open X11 display
## Warning: 'rgl.init' failed, will use the null device.
## See '?rgl.useNULL' for ways to avoid this warning.
# Load the data
data <- read.csv("Display_data.csv")
# Fit the multiple regression model (Revenue ~ Clicks + Impressions)
model <- lm(revenue ~ clicks + impressions, data = data)
# Print the summary of the model
summary(model)
##
## Call:
## lm(formula = revenue ~ clicks + impressions, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -134.682 -50.377 6.526 45.177 104.722
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -19.765768 32.381796 -0.610 0.547
## clicks 2.120330 0.429706 4.934 4e-05 ***
## impressions -0.025465 0.008063 -3.158 0.004 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 72.18 on 26 degrees of freedom
## Multiple R-squared: 0.7237, Adjusted R-squared: 0.7025
## F-statistic: 34.05 on 2 and 26 DF, p-value: 5.463e-08
# Create a scatter plot for Clicks vs Revenue with regression line
ggplot(data, aes(x = clicks, y = revenue)) +
geom_point() + # Scatter plot
geom_smooth(method = "lm", se = FALSE, color = "blue") + # Regression line
labs(title = "Revenue vs Clicks with Regression Line",
x = "Clicks", y = "Revenue")
## `geom_smooth()` using formula = 'y ~ x'

# Create a scatter plot for Impressions vs Revenue with regression line
ggplot(data, aes(x = impressions, y = revenue)) +
geom_point() + # Scatter plot
geom_smooth(method = "lm", se = FALSE, color = "blue") + # Regression line
labs(title = "Revenue vs Impressions with Regression Line",
x = "Impressions", y = "Revenue")
## `geom_smooth()` using formula = 'y ~ x'

# 3D Plot for Clicks, Impressions, and Revenue
# Create a 3D scatter plot for Clicks, Impressions, and Revenue
plot3d(data$clicks, data$impressions, data$revenue, type = "s", col = "blue", size = 3)
# Add the regression plane (using coefficients from the model)
plane <- function(x, y) {
coef(model)[1] + coef(model)[2] * x + coef(model)[3] * y
}
# Create a grid of values for Clicks and Impressions
x_vals <- seq(min(data$clicks), max(data$clicks), length.out = 30)
y_vals <- seq(min(data$impressions), max(data$impressions), length.out = 30)
# Calculate the Z values (Revenue) for the grid
z_vals <- outer(x_vals, y_vals, plane)
# Add the regression surface to the 3D plot
surface3d(x_vals, y_vals, z_vals, alpha = 0.5, col = "red")
Question 2
Reggresion analysis on AB testing
# Load the CSV file
ab_data <- read.csv("ab_testing1.csv")
# View first few rows (optional)
head(ab_data)
## Ads Purchase
## 1 1 152
## 2 0 21
## 3 2 77
## 4 0 65
## 5 1 183
## 6 1 87
# Convert 'Ads' to factor since it's a categorical variable
ab_data$Ads <- as.factor(ab_data$Ads)
# Run regression using factor(Ads) to compare groups
ab_model <- lm(Purchase ~ Ads, data = ab_data)
# Show regression summary
summary(ab_model)
##
## Call:
## lm(formula = Purchase ~ Ads, data = ab_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -59.75 -22.75 -3.75 30.25 64.29
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 49.00 10.21 4.800 5.69e-05 ***
## Ads1 69.71 15.91 4.383 0.000171 ***
## Ads2 24.75 13.82 1.791 0.084982 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 32.28 on 26 degrees of freedom
## Multiple R-squared: 0.4262, Adjusted R-squared: 0.3821
## F-statistic: 9.656 on 2 and 26 DF, p-value: 0.0007308