1. Install and Load
Required Packages
install.packages(c("ggplot2", "dplyr", "readr", "psych", "car", "corrplot", "GGally", "broom", "kableExtra", "plotly"))
library(ggplot2)
library(dplyr)
library(readr)
library(psych)
library(car)
library(corrplot)
library(GGally)
library(broom)
library(kableExtra)
library(plotly)
2. Load the
Dataset
tech_sites <- read.csv("tech_sites.csv")
head(tech_sites) %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
Site
|
Session_Duration
|
Scroll_Depth
|
Click_Count
|
Load_Time
|
JS_Error_Count
|
Engagement_Index
|
Converted
|
google.com.au
|
209.8028
|
90.96691
|
6
|
2.546093
|
2
|
81.06088
|
0
|
youtube.com
|
171.7041
|
82.39004
|
5
|
1.730205
|
0
|
97.73036
|
0
|
facebook.com
|
218.8613
|
71.36253
|
8
|
3.097760
|
0
|
66.29973
|
0
|
amazon.com.au
|
271.3818
|
26.73120
|
1
|
2.203746
|
2
|
77.67609
|
1
|
ebay.com.au
|
165.9508
|
32.93030
|
3
|
1.566051
|
1
|
71.39813
|
0
|
abc.net.au
|
165.9518
|
91.88434
|
4
|
2.243131
|
3
|
57.60156
|
0
|
3. Descriptive
Statistics
describe(tech_sites) %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "condensed"), full_width = FALSE)
|
vars
|
n
|
mean
|
sd
|
median
|
trimmed
|
mad
|
min
|
max
|
range
|
skew
|
kurtosis
|
se
|
Site*
|
1
|
150
|
74.853333
|
43.0491192
|
74.500000
|
74.816667
|
54.8562000
|
1.0000000
|
149.000000
|
148.000000
|
0.0092630
|
-1.2178160
|
3.5149459
|
Session_Duration
|
2
|
150
|
175.060388
|
56.5405334
|
176.764915
|
174.496430
|
50.8403626
|
22.8152938
|
327.794527
|
304.979233
|
0.0467203
|
-0.1802758
|
4.6165152
|
Scroll_Depth
|
3
|
150
|
60.149804
|
23.2372975
|
62.879590
|
60.381821
|
28.8385686
|
20.4049267
|
99.204308
|
78.799381
|
-0.1188487
|
-1.1814721
|
1.8973174
|
Click_Count
|
4
|
150
|
5.960000
|
2.4271396
|
6.000000
|
5.883333
|
2.9652000
|
1.0000000
|
12.000000
|
11.000000
|
0.3036916
|
-0.3509675
|
0.1981751
|
Load_Time
|
5
|
150
|
2.549534
|
0.6244057
|
2.562873
|
2.547124
|
0.6733554
|
0.7622468
|
3.963851
|
3.201605
|
-0.0554327
|
-0.1735574
|
0.0509825
|
JS_Error_Count
|
6
|
150
|
1.480000
|
1.1742197
|
1.000000
|
1.408333
|
1.4826000
|
0.0000000
|
5.000000
|
5.000000
|
0.4676849
|
-0.5383367
|
0.0958746
|
Engagement_Index
|
7
|
150
|
80.273776
|
12.5167799
|
79.706707
|
80.232230
|
12.4177511
|
44.9437942
|
118.317291
|
73.373497
|
0.0111105
|
0.3105759
|
1.0219908
|
Converted
|
8
|
150
|
0.240000
|
0.4285139
|
0.000000
|
0.175000
|
0.0000000
|
0.0000000
|
1.000000
|
1.000000
|
1.2054063
|
-0.5505329
|
0.0349880
|
4. Correlation
Matrix
cor_matrix <- cor(tech_sites[, c("Session_Duration", "Scroll_Depth", "Click_Count",
"Load_Time", "JS_Error_Count", "Engagement_Index")])
corrplot(cor_matrix, method = "color", type = "upper", tl.cex = 0.8, addCoef.col = "black")

5. Visualizations
5.1 Interactive
Company Scatter Plot
library(plotly)
plot_ly(data = tech_sites,
x = ~Scroll_Depth,
y = ~Engagement_Index,
type = "scatter",
mode = "markers",
text = ~paste("Site:", Site,
"<br>Session Duration:", round(Session_Duration, 1),
"<br>Clicks:", Click_Count,
"<br>Load Time:", round(Load_Time, 2)),
hoverinfo = "text",
marker = list(size = 10, color = 'rgba(0,123,255,0.7)', line = list(color = 'rgba(0,0,0,0.5)', width = 1))) %>%
layout(title = "Interactive Engagement vs Scroll Depth by Site",
xaxis = list(title = "Scroll Depth (%)"),
yaxis = list(title = "Engagement Index"))
5.2 Engagement Index
by Conversion
ggplot(tech_sites, aes(x = as.factor(Converted), y = Engagement_Index, fill = as.factor(Converted))) +
geom_boxplot(alpha = 0.7) +
scale_fill_manual(values = c("#FF9999", "#66CC99")) +
labs(x = "Converted", y = "Engagement Index", title = "Engagement Index by Conversion") +
theme_minimal()

5.4 Histogram of Page
Load Time
ggplot(tech_sites, aes(x = Load_Time)) +
geom_histogram(fill = "#008080", bins = 20, color = "white") +
theme_light() +
labs(title = "Distribution of Page Load Time", x = "Load Time (sec)", y = "Frequency")

5.5 Engagement Index
Density Plot
ggplot(tech_sites, aes(x = Engagement_Index, fill = as.factor(Converted))) +
geom_density(alpha = 0.6) +
scale_fill_manual(values = c("tomato", "darkgreen")) +
labs(title = "Engagement Index Density by Conversion", x = "Engagement Index") +
theme_minimal()

6. Regression
Analysis
model <- lm(Engagement_Index ~ Session_Duration + Scroll_Depth + Click_Count +
Load_Time + JS_Error_Count, data = tech_sites)
summary(model)
##
## Call:
## lm(formula = Engagement_Index ~ Session_Duration + Scroll_Depth +
## Click_Count + Load_Time + JS_Error_Count, data = tech_sites)
##
## Residuals:
## Min 1Q Median 3Q Max
## -35.85 -8.32 0.01 7.67 39.15
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 85.680102 6.650312 12.884 <2e-16 ***
## Session_Duration -0.007034 0.018485 -0.381 0.704
## Scroll_Depth -0.045006 0.045253 -0.995 0.322
## Click_Count -0.061988 0.430187 -0.144 0.886
## Load_Time -0.570009 1.666772 -0.342 0.733
## JS_Error_Count 0.239775 0.897047 0.267 0.790
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12.68 on 144 degrees of freedom
## Multiple R-squared: 0.008625, Adjusted R-squared: -0.0258
## F-statistic: 0.2506 on 5 and 144 DF, p-value: 0.939
7. Grouped Summary by
Conversion
tech_sites %>%
group_by(Converted) %>%
summarise(
Mean_Engagement = mean(Engagement_Index),
Median_Engagement = median(Engagement_Index),
SD_Engagement = sd(Engagement_Index),
Mean_LoadTime = mean(Load_Time)
) %>%
kable() %>%
kable_styling(bootstrap_options = "striped", full_width = FALSE)
Converted
|
Mean_Engagement
|
Median_Engagement
|
SD_Engagement
|
Mean_LoadTime
|
0
|
81.45856
|
81.85608
|
12.65907
|
2.549729
|
1
|
76.52195
|
77.41908
|
11.42496
|
2.548918
|
8. Top 10 Converting
Sites
tech_sites %>%
filter(Converted == 1) %>%
arrange(desc(Engagement_Index)) %>%
select(Site, Engagement_Index, Session_Duration, Scroll_Depth) %>%
head(10) %>%
kable() %>%
kable_styling(bootstrap_options = "striped", full_width = FALSE)
Site
|
Engagement_Index
|
Session_Duration
|
Scroll_Depth
|
seek.com.au
|
102.24911
|
152.0562
|
32.86464
|
my.gov.au
|
101.26702
|
116.5373
|
60.21097
|
skyscanner.com.au
|
93.97687
|
141.2928
|
76.19873
|
zip.co/au
|
93.78135
|
178.4092
|
41.66658
|
drive.com.au
|
90.60854
|
235.8768
|
45.35376
|
airtasker.com.au
|
85.14580
|
185.2228
|
60.90739
|
maccas.com.au
|
82.33261
|
195.5930
|
33.85615
|
menulog.com.au
|
81.47204
|
180.3068
|
81.59948
|
commsec.com.au
|
81.35924
|
183.6138
|
55.11771
|
open.edu.au
|
80.81427
|
227.4619
|
32.51496
|
9. Export Summary Stats
to CSV
summary_stats <- tech_sites %>%
group_by(Converted) %>%
summarise(
Mean_Session = mean(Session_Duration),
Mean_Clicks = mean(Click_Count),
Mean_Load = mean(Load_Time)
)
write.csv(summary_stats, "summary_by_conversion.csv", row.names = FALSE)