1 1. Install and Load Required Packages

install.packages(c("ggplot2", "dplyr", "readr", "psych", "car", "corrplot", "GGally", "broom", "kableExtra", "plotly"))
library(ggplot2)
library(dplyr)
library(readr)
library(psych)
library(car)
library(corrplot)
library(GGally)
library(broom)
library(kableExtra)
library(plotly)

2 2. Load the Dataset

tech_sites <- read.csv("tech_sites.csv")
head(tech_sites) %>% 
  kable() %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
Site Session_Duration Scroll_Depth Click_Count Load_Time JS_Error_Count Engagement_Index Converted
google.com.au 209.8028 90.96691 6 2.546093 2 81.06088 0
youtube.com 171.7041 82.39004 5 1.730205 0 97.73036 0
facebook.com 218.8613 71.36253 8 3.097760 0 66.29973 0
amazon.com.au 271.3818 26.73120 1 2.203746 2 77.67609 1
ebay.com.au 165.9508 32.93030 3 1.566051 1 71.39813 0
abc.net.au 165.9518 91.88434 4 2.243131 3 57.60156 0

3 3. Descriptive Statistics

describe(tech_sites) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "condensed"), full_width = FALSE)
vars n mean sd median trimmed mad min max range skew kurtosis se
Site* 1 150 74.853333 43.0491192 74.500000 74.816667 54.8562000 1.0000000 149.000000 148.000000 0.0092630 -1.2178160 3.5149459
Session_Duration 2 150 175.060388 56.5405334 176.764915 174.496430 50.8403626 22.8152938 327.794527 304.979233 0.0467203 -0.1802758 4.6165152
Scroll_Depth 3 150 60.149804 23.2372975 62.879590 60.381821 28.8385686 20.4049267 99.204308 78.799381 -0.1188487 -1.1814721 1.8973174
Click_Count 4 150 5.960000 2.4271396 6.000000 5.883333 2.9652000 1.0000000 12.000000 11.000000 0.3036916 -0.3509675 0.1981751
Load_Time 5 150 2.549534 0.6244057 2.562873 2.547124 0.6733554 0.7622468 3.963851 3.201605 -0.0554327 -0.1735574 0.0509825
JS_Error_Count 6 150 1.480000 1.1742197 1.000000 1.408333 1.4826000 0.0000000 5.000000 5.000000 0.4676849 -0.5383367 0.0958746
Engagement_Index 7 150 80.273776 12.5167799 79.706707 80.232230 12.4177511 44.9437942 118.317291 73.373497 0.0111105 0.3105759 1.0219908
Converted 8 150 0.240000 0.4285139 0.000000 0.175000 0.0000000 0.0000000 1.000000 1.000000 1.2054063 -0.5505329 0.0349880

4 4. Correlation Matrix

cor_matrix <- cor(tech_sites[, c("Session_Duration", "Scroll_Depth", "Click_Count", 
                                 "Load_Time", "JS_Error_Count", "Engagement_Index")])
corrplot(cor_matrix, method = "color", type = "upper", tl.cex = 0.8, addCoef.col = "black")

5 5. Visualizations

5.1 5.1 Interactive Company Scatter Plot

library(plotly)
plot_ly(data = tech_sites, 
        x = ~Scroll_Depth, 
        y = ~Engagement_Index,
        type = "scatter",
        mode = "markers",
        text = ~paste("Site:", Site,
                      "<br>Session Duration:", round(Session_Duration, 1),
                      "<br>Clicks:", Click_Count,
                      "<br>Load Time:", round(Load_Time, 2)),
        hoverinfo = "text",
        marker = list(size = 10, color = 'rgba(0,123,255,0.7)', line = list(color = 'rgba(0,0,0,0.5)', width = 1))) %>%
  layout(title = "Interactive Engagement vs Scroll Depth by Site",
         xaxis = list(title = "Scroll Depth (%)"),
         yaxis = list(title = "Engagement Index"))

5.2 5.2 Engagement Index by Conversion

ggplot(tech_sites, aes(x = as.factor(Converted), y = Engagement_Index, fill = as.factor(Converted))) +
  geom_boxplot(alpha = 0.7) +
  scale_fill_manual(values = c("#FF9999", "#66CC99")) +
  labs(x = "Converted", y = "Engagement Index", title = "Engagement Index by Conversion") +
  theme_minimal()

5.3 5.3 Scroll Depth vs Session Duration

ggplot(tech_sites, aes(x = Scroll_Depth, y = Session_Duration)) +
  geom_point(color = "#3399CC", alpha = 0.7) +
  geom_smooth(method = "lm", se = FALSE, color = "darkred") +
  theme_minimal() +
  labs(title = "Scroll Depth vs Session Duration", x = "Scroll Depth (%)", y = "Session Duration (sec)")

5.4 5.4 Histogram of Page Load Time

ggplot(tech_sites, aes(x = Load_Time)) +
  geom_histogram(fill = "#008080", bins = 20, color = "white") +
  theme_light() +
  labs(title = "Distribution of Page Load Time", x = "Load Time (sec)", y = "Frequency")

5.5 5.5 Engagement Index Density Plot

ggplot(tech_sites, aes(x = Engagement_Index, fill = as.factor(Converted))) +
  geom_density(alpha = 0.6) +
  scale_fill_manual(values = c("tomato", "darkgreen")) +
  labs(title = "Engagement Index Density by Conversion", x = "Engagement Index") +
  theme_minimal()

6 6. Regression Analysis

model <- lm(Engagement_Index ~ Session_Duration + Scroll_Depth + Click_Count + 
              Load_Time + JS_Error_Count, data = tech_sites)
summary(model)
## 
## Call:
## lm(formula = Engagement_Index ~ Session_Duration + Scroll_Depth + 
##     Click_Count + Load_Time + JS_Error_Count, data = tech_sites)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -35.85  -8.32   0.01   7.67  39.15 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      85.680102   6.650312  12.884   <2e-16 ***
## Session_Duration -0.007034   0.018485  -0.381    0.704    
## Scroll_Depth     -0.045006   0.045253  -0.995    0.322    
## Click_Count      -0.061988   0.430187  -0.144    0.886    
## Load_Time        -0.570009   1.666772  -0.342    0.733    
## JS_Error_Count    0.239775   0.897047   0.267    0.790    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12.68 on 144 degrees of freedom
## Multiple R-squared:  0.008625,   Adjusted R-squared:  -0.0258 
## F-statistic: 0.2506 on 5 and 144 DF,  p-value: 0.939

7 7. Grouped Summary by Conversion

tech_sites %>%
  group_by(Converted) %>%
  summarise(
    Mean_Engagement = mean(Engagement_Index),
    Median_Engagement = median(Engagement_Index),
    SD_Engagement = sd(Engagement_Index),
    Mean_LoadTime = mean(Load_Time)
  ) %>%
  kable() %>%
  kable_styling(bootstrap_options = "striped", full_width = FALSE)
Converted Mean_Engagement Median_Engagement SD_Engagement Mean_LoadTime
0 81.45856 81.85608 12.65907 2.549729
1 76.52195 77.41908 11.42496 2.548918

8 8. Top 10 Converting Sites

tech_sites %>%
  filter(Converted == 1) %>%
  arrange(desc(Engagement_Index)) %>%
  select(Site, Engagement_Index, Session_Duration, Scroll_Depth) %>%
  head(10) %>%
  kable() %>%
  kable_styling(bootstrap_options = "striped", full_width = FALSE)
Site Engagement_Index Session_Duration Scroll_Depth
seek.com.au 102.24911 152.0562 32.86464
my.gov.au 101.26702 116.5373 60.21097
skyscanner.com.au 93.97687 141.2928 76.19873
zip.co/au 93.78135 178.4092 41.66658
drive.com.au 90.60854 235.8768 45.35376
airtasker.com.au 85.14580 185.2228 60.90739
maccas.com.au 82.33261 195.5930 33.85615
menulog.com.au 81.47204 180.3068 81.59948
commsec.com.au 81.35924 183.6138 55.11771
open.edu.au 80.81427 227.4619 32.51496

9 9. Export Summary Stats to CSV

summary_stats <- tech_sites %>%
  group_by(Converted) %>%
  summarise(
    Mean_Session = mean(Session_Duration),
    Mean_Clicks = mean(Click_Count),
    Mean_Load = mean(Load_Time)
  )
write.csv(summary_stats, "summary_by_conversion.csv", row.names = FALSE)