library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr)
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
data <- read_csv("Display_data.csv")
## Rows: 29 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (8): spend, clicks, impressions, display, transactions, revenue, ctr, co...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
colnames(data) <- tolower(colnames(data))
data <- na.omit(data)
colnames(data) <- tolower(colnames(data))
data <- na.omit(data)
sink("summary_statistics.txt")
print(summary(data))
sink()
simple_model <- lm(revenue ~ spend, data = data)
sink("simple_regression_results.txt")
print(summary(simple_model))
sink()
ggplot(data, aes(x = spend, y = revenue)) +
geom_point() +
geom_smooth(method = "lm", col = "blue") +
ggtitle("Simple Regression: Revenue vs. Spend") +
xlab("Spend") +
ylab("Revenue") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

coefficients <- summary(simple_model)$coefficients
r_squared <- summary(simple_model)$r.squared
p_value <- coefficients[2, 4]
slope <- coefficients[2, 1]
cat("Regression Coefficient (Slope): ", round(slope, 4), "\n")
## Regression Coefficient (Slope): 4.8066
cat("R-squared Value: ", round(r_squared, 4), "\n")
## R-squared Value: 0.586
cat("P-value for 'Spend': ", round(p_value, 4), "\n")
## P-value for 'Spend': 0
library(ggplot2)
library(dplyr)
library(readr)
library(stargazer)
# Please use the dataset “Display_data.csv” for this question.
data <- read_csv("Display_data.csv")
## Rows: 29 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (8): spend, clicks, impressions, display, transactions, revenue, ctr, co...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
print(colnames(data))
## [1] "spend" "clicks" "impressions" "display" "transactions"
## [6] "revenue" "ctr" "con_rate"
colnames(data) <- tolower(colnames(data))
data$display <- as.factor(data$display)
if (sum(is.na(data)) > 0) {
cat("Missing values found! Removing them...\n")
data <- na.omit(data)
}
cat("\n--- Data Summary ---\n")
##
## --- Data Summary ---
summary(data)
## spend clicks impressions display transactions
## Min. : 1.12 Min. : 48.0 Min. : 1862 0:20 Min. :1.000
## 1st Qu.:28.73 1st Qu.:172.0 1st Qu.: 6048 1: 9 1st Qu.:2.000
## Median :39.68 Median :241.0 Median : 9934 Median :3.000
## Mean :44.22 Mean :257.1 Mean :11858 Mean :2.966
## 3rd Qu.:55.57 3rd Qu.:303.0 3rd Qu.:14789 3rd Qu.:4.000
## Max. :91.28 Max. :593.0 Max. :29324 Max. :6.000
## revenue ctr con_rate
## Min. : 16.16 Min. :1.890 Min. :0.810
## 1st Qu.:117.32 1st Qu.:1.970 1st Qu.:0.990
## Median :235.16 Median :2.020 Median :1.130
## Mean :223.50 Mean :2.306 Mean :1.227
## 3rd Qu.:298.92 3rd Qu.:2.790 3rd Qu.:1.470
## Max. :522.00 Max. :3.290 Max. :2.080
sink("summary_statistics.txt")
print(summary(data))
sink()
cat("\n--- Running Multiple Regression Model: revenue ~ spend + display ---\n")
##
## --- Running Multiple Regression Model: revenue ~ spend + display ---
multiple_model <- lm(revenue ~ spend + display, data = data)
print(summary(multiple_model))
##
## Call:
## lm(formula = revenue ~ spend + display, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -176.730 -35.020 8.661 56.440 129.231
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -50.8612 40.3336 -1.261 0.21850
## spend 5.5473 0.7415 7.482 6.07e-08 ***
## display1 93.5856 33.1910 2.820 0.00908 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 77.33 on 26 degrees of freedom
## Multiple R-squared: 0.6829, Adjusted R-squared: 0.6586
## F-statistic: 28 on 2 and 26 DF, p-value: 3.271e-07
sink("multiple_regression_results.txt")
print(summary(multiple_model))
sink()
cat("\n--- Generating Visualization ---\n")
##
## --- Generating Visualization ---
ggplot(data, aes(x = spend, y = revenue, color = display)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
ggtitle("Multiple Regression: Revenue vs. Spend & Display") +
xlab("Spend") +
ylab("Revenue") +
labs(color = "Display Campaign (0 = No, 1 = Yes)") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

#Please use the dataset “ab_testing1.csv” for this question.
library(ggplot2)
library(dplyr)
library(readr)
library(stargazer)
library(ggplot2)
library(dplyr)
library(readr)
library(stargazer)
data <- read_csv("ab_testing1.csv")
## Rows: 29 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (2): Ads, Purchase
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
print(colnames(data))
## [1] "Ads" "Purchase"
if ("Ads" %in% colnames(data) & "Purchase" %in% colnames(data)) {
data$Ads <- as.factor(data$Ads)
} else {
stop("Error: 'Ads' or 'Purchase' column not found in the dataset.")
}
data <- na.omit(data)
model <- lm(Purchase ~ Ads, data = data)
sink("regression_results.txt")
print(summary(model))
sink()
ggplot(data, aes(x = Ads, y = Purchase, fill = Ads)) +
geom_boxplot() +
ggtitle("Impact of Ads on Product Purchase") +
xlab("Advertisement Version") +
ylab("Number of Purchases") +
theme_minimal()
