This dashboard analyzes user behavior metrics from 150 popular tech and Australian websites. The focus is on identifying patterns in frontend performance that influence user engagement and conversion.
tech_sites <- read.csv("tech_sites.csv")
library(kableExtra)
tech_sites %>%
kable() %>%
kable_styling(
bootstrap_options = c("striped", "hover", "condensed"),
full_width = FALSE,
position = "left"
) %>%
scroll_box(height = "600px", fixed_thead = TRUE)
| Site | Session_Duration | Scroll_Depth | Click_Count | Load_Time | JS_Error_Count | Engagement_Index | Converted |
|---|---|---|---|---|---|---|---|
| google.com.au | 209.80285 | 90.96691 | 6 | 2.5460931 | 2 | 81.06088 | 0 |
| youtube.com | 171.70414 | 82.39004 | 5 | 1.7302047 | 0 | 97.73036 | 0 |
| facebook.com | 218.86131 | 71.36253 | 8 | 3.0977601 | 0 | 66.29973 | 0 |
| amazon.com.au | 271.38179 | 26.73120 | 1 | 2.2037461 | 2 | 77.67609 | 1 |
| ebay.com.au | 165.95080 | 32.93030 | 3 | 1.5660509 | 1 | 71.39813 | 0 |
| abc.net.au | 165.95178 | 91.88434 | 4 | 2.2431309 | 3 | 57.60156 | 0 |
| news.com.au | 274.75277 | 68.51432 | 11 | 3.4004559 | 2 | 79.00783 | 0 |
| smh.com.au | 226.04608 | 20.73576 | 9 | 3.0101330 | 3 | 78.53903 | 1 |
| theage.com.au | 151.83154 | 28.11772 | 4 | 2.2908087 | 0 | 98.16140 | 0 |
| realestate.com.au | 212.55360 | 73.08014 | 8 | 2.2904454 | 2 | 87.56974 | 0 |
| domain.com.au | 152.19494 | 20.40493 | 7 | 2.3070190 | 0 | 67.70976 | 0 |
| seek.com.au | 152.05621 | 32.86464 | 5 | 3.7460488 | 0 | 102.24911 | 1 |
| indeed.com.au | 194.51774 | 63.89870 | 5 | 2.7291613 | 1 | 94.65240 | 0 |
| linkedin.com | 65.20319 | 75.35162 | 5 | 2.7580250 | 1 | 86.98517 | 0 |
| twitter.com | 76.50493 | 72.15690 | 9 | 3.1181701 | 1 | 77.28219 | 0 |
| instagram.com | 146.26275 | 37.94154 | 3 | 2.6432735 | 1 | 68.48673 | 0 |
| reddit.com | 119.23013 | 76.97434 | 3 | 2.3445747 | 1 | 75.53352 | 0 |
| bing.com | 198.85484 | 38.97993 | 9 | 2.3821901 | 3 | 93.06498 | 0 |
| yahoo.com | 125.51856 | 46.03198 | 5 | 2.4570392 | 1 | 102.61504 | 0 |
| microsoft.com | 95.26178 | 79.71931 | 9 | 2.4776667 | 4 | 98.51892 | 0 |
| office.com | 267.93893 | 71.97063 | 7 | 2.9365777 | 1 | 74.13381 | 0 |
| live.com | 166.45342 | 87.93787 | 7 | 2.5311675 | 2 | 66.56459 | 1 |
| netflix.com | 184.05169 | 72.60903 | 11 | 2.9395840 | 1 | 81.69064 | 0 |
| stan.com.au | 94.51511 | 65.46469 | 4 | 2.4515701 | 1 | 58.77873 | 1 |
| disneyplus.com | 147.33704 | 27.49398 | 8 | 2.5471811 | 2 | 83.87801 | 0 |
| spotify.com | 186.65536 | 49.41726 | 12 | 1.3010796 | 3 | 78.22877 | 0 |
| apple.com | 110.94039 | 41.21619 | 6 | 3.0497966 | 1 | 74.40756 | 0 |
| telstra.com | 202.54188 | 39.51917 | 8 | 2.7078931 | 4 | 60.86356 | 0 |
| optus.com.au | 143.96168 | 97.84084 | 5 | 3.0988061 | 1 | 86.16320 | 0 |
| commbank.com.au | 162.49838 | 51.44782 | 8 | 0.7622468 | 1 | 73.60759 | 0 |
| nab.com.au | 143.89760 | 91.36372 | 9 | 3.7530248 | 3 | 65.96100 | 0 |
| anz.com | 291.13669 | 70.49109 | 4 | 2.4162462 | 1 | 45.53285 | 1 |
| westpac.com.au | 179.19017 | 83.58490 | 3 | 3.1649097 | 1 | 79.66982 | 0 |
| my.gov.au | 116.53734 | 60.21097 | 6 | 1.8760564 | 2 | 101.26702 | 1 |
| servicesaustralia.gov.au | 229.35269 | 66.15231 | 2 | 2.8676643 | 1 | 99.93511 | 0 |
| ato.gov.au | 106.74938 | 59.40142 | 2 | 1.8679507 | 3 | 74.51484 | 0 |
| australiapost.com.au | 192.53182 | 35.61944 | 5 | 2.1257386 | 2 | 72.77346 | 0 |
| bom.gov.au | 62.41979 | 77.79617 | 5 | 3.6484188 | 1 | 85.62529 | 0 |
| healthdirect.gov.au | 100.30884 | 42.46179 | 5 | 2.3855906 | 4 | 68.01937 | 0 |
| catch.com.au | 191.81167 | 21.94528 | 5 | 2.6304597 | 2 | 83.62150 | 0 |
| kogan.com | 224.30799 | 71.63778 | 6 | 3.0220406 | 1 | 89.19296 | 0 |
| jbhifi.com.au | 190.28210 | 34.16885 | 4 | 2.7974091 | 3 | 94.72320 | 0 |
| officeworks.com.au | 173.06110 | 95.23669 | 4 | 2.5902513 | 1 | 78.79815 | 0 |
| woolworths.com.au | 161.93378 | 96.31429 | 8 | 2.7189766 | 4 | 77.55591 | 1 |
| coles.com.au | 91.28868 | 93.18915 | 5 | 3.9420494 | 0 | 69.46421 | 1 |
| bunnings.com.au | 136.80935 | 49.61270 | 4 | 2.4654287 | 1 | 70.07744 | 1 |
| target.com.au | 152.36167 | 21.23653 | 3 | 2.6206594 | 0 | 77.28225 | 1 |
| bigw.com.au | 243.42733 | 94.26548 | 6 | 3.1303926 | 1 | 84.40839 | 0 |
| myer.com.au | 200.61710 | 54.25473 | 9 | 3.1633156 | 0 | 90.96302 | 0 |
| davidjones.com | 74.21759 | 97.33239 | 6 | 3.2122182 | 0 | 70.36185 | 1 |
| chemistwarehouse.com.au | 199.44504 | 97.08960 | 4 | 2.8832381 | 3 | 97.91226 | 0 |
| priceline.com.au | 156.89506 | 88.24076 | 4 | 1.8141971 | 1 | 76.74652 | 0 |
| booktopia.com.au | 139.38468 | 43.55591 | 6 | 3.4800589 | 1 | 79.74359 | 0 |
| templeandwebster.com.au | 216.70058 | 50.80782 | 10 | 1.8121928 | 1 | 71.03346 | 0 |
| carsales.com.au | 241.85997 | 88.09093 | 6 | 2.6815813 | 2 | 50.90912 | 0 |
| drive.com.au | 235.87681 | 45.35376 | 6 | 2.0474345 | 0 | 90.60854 | 1 |
| gumtree.com.au | 129.64695 | 33.55942 | 8 | 2.4615170 | 2 | 88.84213 | 0 |
| carsguide.com.au | 161.44726 | 64.54410 | 8 | 2.6972574 | 1 | 76.62407 | 0 |
| webjet.com.au | 199.87581 | 94.89238 | 7 | 2.6928143 | 2 | 80.80389 | 1 |
| qantas.com | 238.53271 | 75.68238 | 5 | 2.7531525 | 0 | 86.19127 | 0 |
| virginaustralia.com | 151.24955 | 65.60489 | 5 | 3.4682268 | 0 | 61.24945 | 0 |
| jetstar.com | 168.86046 | 27.77412 | 8 | 2.7721206 | 1 | 73.65137 | 0 |
| booking.com | 113.61990 | 69.20058 | 7 | 2.3535060 | 1 | 89.53118 | 0 |
| airbnb.com.au | 108.22760 | 99.20431 | 6 | 3.0784523 | 4 | 64.94853 | 1 |
| tripadvisor.com | 228.75155 | 31.20672 | 11 | 3.2136823 | 2 | 83.52270 | 0 |
| expedia.com.au | 261.37440 | 61.46637 | 8 | 1.7634353 | 0 | 63.72102 | 1 |
| hotels.com | 175.67939 | 90.18985 | 2 | 2.8584400 | 2 | 85.59716 | 0 |
| wotif.com | 240.21197 | 79.26149 | 8 | 2.9207036 | 1 | 79.57230 | 1 |
| luxuryescapes.com | 201.69816 | 75.76126 | 4 | 2.3214619 | 3 | 60.61842 | 1 |
| skyscanner.com.au | 141.29281 | 76.19873 | 4 | 3.3254241 | 1 | 93.97687 | 1 |
| whirlpool.net.au | 201.68374 | 48.75929 | 5 | 2.4099666 | 0 | 71.18490 | 0 |
| ozbargain.com.au | 272.28219 | 43.48735 | 7 | 2.5753459 | 1 | 70.27697 | 1 |
| canva.com | 177.85044 | 84.74889 | 3 | 2.3961569 | 0 | 82.40683 | 0 |
| atlassian.com | 273.87862 | 84.80907 | 5 | 2.5093474 | 1 | 93.78365 | 0 |
| envato.com | 22.81529 | 89.36579 | 6 | 1.8422349 | 0 | 67.81014 | 1 |
| freelancer.com.au | 229.31415 | 93.05924 | 3 | 1.6359695 | 2 | 80.74016 | 0 |
| airtasker.com.au | 185.22282 | 60.90739 | 4 | 3.4567030 | 2 | 85.14580 | 1 |
| hipages.com.au | 162.05956 | 60.12130 | 2 | 1.9918232 | 2 | 88.31727 | 0 |
| comparethemarket.com.au | 185.50565 | 83.86361 | 4 | 1.9051646 | 0 | 82.11730 | 0 |
| finder.com.au | 60.74587 | 71.99711 | 7 | 1.2079659 | 1 | 75.59567 | 0 |
| canstar.com.au | 166.81969 | 76.15735 | 2 | 2.1166230 | 2 | 70.06892 | 0 |
| mozo.com.au | 201.42675 | 83.66341 | 2 | 1.7061461 | 1 | 81.03373 | 0 |
| choice.com.au | 268.67364 | 91.20043 | 8 | 3.4852091 | 5 | 67.13433 | 0 |
| sbs.com.au | 148.90379 | 47.03961 | 7 | 3.1058903 | 2 | 44.94379 | 0 |
| 7news.com.au | 131.49038 | 50.04664 | 6 | 2.0871098 | 1 | 85.23872 | 0 |
| 9news.com.au | 149.89458 | 27.51856 | 7 | 3.8514615 | 3 | 90.84722 | 0 |
| 10play.com.au | 234.92413 | 66.26241 | 3 | 3.0890593 | 3 | 51.64481 | 0 |
| abc.net.au/iview | 199.72507 | 22.87538 | 6 | 2.3051012 | 1 | 67.88323 | 0 |
| sbs.com.au/ondemand | 148.21439 | 57.24784 | 8 | 1.0003566 | 3 | 87.42985 | 0 |
| foxtel.com.au | 210.79605 | 63.41157 | 3 | 3.8745655 | 0 | 104.68995 | 0 |
| kayosports.com.au | 185.82465 | 42.92330 | 5 | 1.6662565 | 1 | 80.24952 | 1 |
| binge.com.au | 238.11870 | 67.26666 | 8 | 1.5127608 | 0 | 71.26396 | 1 |
| afl.com.au | 137.87681 | 22.44002 | 3 | 3.1135423 | 0 | 77.80524 | 0 |
| nrl.com | 160.34027 | 22.98786 | 5 | 3.9638514 | 3 | 96.49852 | 0 |
| cricket.com.au | 156.47351 | 85.80804 | 9 | 3.3305637 | 2 | 72.24843 | 1 |
| foxsports.com.au | 92.18910 | 48.81525 | 5 | 2.8383455 | 0 | 70.40970 | 0 |
| espn.com.au | 197.76722 | 30.16484 | 9 | 2.8568526 | 0 | 74.20708 | 0 |
| netball.com.au | 195.66332 | 61.77946 | 6 | 3.0120493 | 0 | 68.56006 | 0 |
| menulog.com.au | 180.30681 | 81.59948 | 8 | 2.9553572 | 2 | 81.47204 | 1 |
| ubereats.com | 165.92477 | 37.26568 | 6 | 2.6687149 | 0 | 99.49614 | 0 |
| deliveroo.com.au | 95.07776 | 69.83124 | 3 | 2.5625207 | 0 | 83.87695 | 0 |
| doordash.com.au | 154.76128 | 26.82780 | 5 | 2.4624441 | 2 | 76.97176 | 1 |
| hellofresh.com.au | 159.43713 | 24.13454 | 4 | 2.0476212 | 2 | 76.49826 | 1 |
| marleyspoon.com.au | 131.86336 | 62.50837 | 7 | 2.3315950 | 0 | 61.24171 | 0 |
| raywhite.com | 170.32286 | 63.25081 | 10 | 1.4842259 | 3 | 90.59732 | 0 |
| ljhooker.com.au | 204.24305 | 70.99439 | 4 | 2.4409962 | 0 | 79.06595 | 1 |
| rea-group.com | 293.17115 | 78.08731 | 1 | 1.9068453 | 3 | 77.83424 | 0 |
| macquarie.com.au | 190.47467 | 98.06817 | 7 | 1.8378464 | 3 | 118.31729 | 0 |
| ing.com.au | 195.45302 | 61.30403 | 9 | 2.6079365 | 1 | 83.58503 | 0 |
| paypal.com/au | 175.53325 | 45.83652 | 5 | 3.3352014 | 2 | 70.97851 | 1 |
| afterpay.com | 64.87373 | 83.61490 | 7 | 3.0509900 | 1 | 74.88371 | 0 |
| zip.co/au | 178.40917 | 41.66658 | 6 | 1.5576996 | 3 | 93.78135 | 1 |
| commsec.com.au | 183.61381 | 55.11771 | 6 | 1.9062231 | 0 | 81.35924 | 1 |
| cmcmarkets.com | 327.79453 | 26.27651 | 9 | 3.0644627 | 0 | 62.74066 | 0 |
| tpg.com.au | 168.45834 | 22.02806 | 5 | 1.9105076 | 2 | 91.03075 | 0 |
| vodafone.com.au | 198.09284 | 97.01187 | 10 | 2.3652201 | 4 | 71.98227 | 0 |
| monash.edu | 177.91729 | 86.87841 | 12 | 2.8300313 | 3 | 102.47957 | 0 |
| sydney.edu.au | 109.87932 | 75.67794 | 7 | 1.9189933 | 2 | 92.96058 | 0 |
| unimelb.edu.au | 248.56937 | 52.71624 | 1 | 2.5632253 | 3 | 74.63214 | 1 |
| unsw.edu.au | 225.11598 | 33.86355 | 8 | 1.6995847 | 0 | 95.37220 | 0 |
| open.edu.au | 227.46192 | 32.51496 | 8 | 2.1391794 | 0 | 80.81427 | 1 |
| tafensw.edu.au | 125.43675 | 40.01943 | 5 | 2.6918692 | 2 | 90.23328 | 0 |
| techradar.com | 264.16766 | 63.93813 | 7 | 1.5442038 | 3 | 85.81679 | 0 |
| gizmodo.com.au | 95.88894 | 77.16767 | 4 | 2.7642848 | 2 | 69.84372 | 0 |
| cnet.com | 215.21143 | 72.81579 | 7 | 2.4882173 | 3 | 72.27740 | 0 |
| theguardian.com/au | 311.42734 | 42.39471 | 4 | 2.8314940 | 1 | 92.35953 | 0 |
| dailytelegraph.com.au | 120.56782 | 96.38922 | 7 | 2.6343485 | 1 | 75.98270 | 0 |
| heraldsun.com.au | 146.02214 | 79.03175 | 5 | 3.3184843 | 1 | 75.15622 | 0 |
| bbc.com | 185.97908 | 64.34832 | 12 | 2.5751347 | 1 | 68.53853 | 0 |
| cnn.com | 149.79146 | 68.93766 | 5 | 2.2423567 | 2 | 85.08319 | 0 |
| wikipedia.org | 86.96019 | 53.56800 | 5 | 2.5733785 | 3 | 104.75030 | 0 |
| pinterest.com.au | 184.11378 | 39.81848 | 5 | 2.8259788 | 2 | 67.18961 | 0 |
| snapchat.com | 116.26178 | 48.47781 | 4 | 2.5293160 | 0 | 80.29063 | 1 |
| tiktok.com | 208.41555 | 80.62769 | 5 | 2.5243550 | 1 | 96.94665 | 0 |
| aldi.com.au | 124.83455 | 21.15148 | 7 | 2.0788050 | 1 | 79.04430 | 0 |
| costco.com.au | 272.99606 | 29.28581 | 4 | 2.1022594 | 2 | 85.42846 | 0 |
| ikea.com/au | 133.00480 | 23.68021 | 3 | 1.6584368 | 0 | 67.25128 | 0 |
| kmart.com.au | 160.67631 | 23.25830 | 4 | 3.5497460 | 3 | 85.13969 | 0 |
| danmurphys.com.au | 228.81103 | 88.43685 | 4 | 1.7536821 | 3 | 77.75427 | 0 |
| bws.com.au | 106.14814 | 76.29263 | 9 | 2.0842569 | 1 | 91.82876 | 0 |
| liquorland.com.au | 193.64760 | 57.93391 | 6 | 2.0689556 | 1 | 94.24863 | 0 |
| guzmanygomez.com | 258.42857 | 27.82673 | 5 | 3.0369546 | 1 | 111.07476 | 0 |
| dominos.com.au | 83.55101 | 59.32927 | 10 | 2.3230302 | 1 | 86.95560 | 0 |
| redrooster.com.au | 191.07803 | 57.87774 | 2 | 3.2486452 | 2 | 83.90956 | 0 |
| maccas.com.au | 195.59297 | 33.85615 | 9 | 2.0959056 | 0 | 82.33261 | 1 |
| subway.com.au | 226.90937 | 54.70813 | 11 | 2.6673965 | 2 | 75.76200 | 0 |
| coles.com.au/liquor | 105.78296 | 51.88038 | 7 | 1.9987918 | 3 | 84.06181 | 0 |
| myer.com.au/sale | 100.77260 | 69.26801 | 8 | 3.7870895 | 2 | 76.45518 | 0 |
| eventcinemas.com.au | 211.31649 | 70.80749 | 6 | 1.7874409 | 0 | 82.02153 | 0 |
| envato.com | 197.81908 | 23.62432 | 5 | 2.6858924 | 2 | 95.81117 | 0 |
This table displays the dataset which includes user behavior metrics like session duration, scroll depth, and conversions.
library(psych)
library(dplyr)
library(ggplot2)
library(plotly)
library(corrplot)
describe(tech_sites) %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)
| vars | n | mean | sd | median | trimmed | mad | min | max | range | skew | kurtosis | se | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Site* | 1 | 150 | 74.853333 | 43.0491192 | 74.500000 | 74.816667 | 54.8562000 | 1.0000000 | 149.000000 | 148.000000 | 0.0092630 | -1.2178160 | 3.5149459 |
| Session_Duration | 2 | 150 | 175.060388 | 56.5405334 | 176.764915 | 174.496430 | 50.8403626 | 22.8152938 | 327.794527 | 304.979233 | 0.0467203 | -0.1802758 | 4.6165152 |
| Scroll_Depth | 3 | 150 | 60.149804 | 23.2372975 | 62.879590 | 60.381821 | 28.8385686 | 20.4049267 | 99.204308 | 78.799381 | -0.1188487 | -1.1814721 | 1.8973174 |
| Click_Count | 4 | 150 | 5.960000 | 2.4271396 | 6.000000 | 5.883333 | 2.9652000 | 1.0000000 | 12.000000 | 11.000000 | 0.3036916 | -0.3509675 | 0.1981751 |
| Load_Time | 5 | 150 | 2.549534 | 0.6244057 | 2.562873 | 2.547124 | 0.6733554 | 0.7622468 | 3.963851 | 3.201605 | -0.0554327 | -0.1735574 | 0.0509825 |
| JS_Error_Count | 6 | 150 | 1.480000 | 1.1742197 | 1.000000 | 1.408333 | 1.4826000 | 0.0000000 | 5.000000 | 5.000000 | 0.4676849 | -0.5383367 | 0.0958746 |
| Engagement_Index | 7 | 150 | 80.273776 | 12.5167799 | 79.706707 | 80.232230 | 12.4177511 | 44.9437942 | 118.317291 | 73.373497 | 0.0111105 | 0.3105759 | 1.0219908 |
| Converted | 8 | 150 | 0.240000 | 0.4285139 | 0.000000 | 0.175000 | 0.0000000 | 0.0000000 | 1.000000 | 1.000000 | 1.2054063 | -0.5505329 | 0.0349880 |
This table is a summary of what all 150 people did, broken down for each behavior.
What This Table Shows (Broken Down Simply):
vars- Just the row number of the variable
n- Number of people or sessions we’re studying (150 for all)
mean- The average value for that metric
sd (std. deviation)- How much people’s behavior varied from the average (higher = more spread out)
median- The middle value if we line everyone up from least to most
min & max- The smallest and biggest value anyone reached
range- The difference between min and max
mad- Like standard deviation, but even more focused on differences from the middle
trimmed- A version of the mean that ignores extreme highs/lows
skew- Shows if the data is lopsided (skewed right or left)
kurtosis- Tells if data is flat or spiky compared to normal (normal is ~0)
se (standard error)- How much the mean could vary if we repeated the experiment
Real-World Examples from Your Table:
Session_Duration: On average, people stayed 175 seconds, but some stayed as little as 22 seconds and others over 327 seconds!
Click_Count: People clicked about 6 times, ranging from 1 to 12.
Converted: Only 24% of people converted (i.e., completed the action like signup or purchase).
Load_Time: On average, pages took 2.55 seconds to load — not bad!
Engagement_Index: This is your custom score. It averages 80 out of 100, which is a strong sign users are active.
“Users behaviour on 150 website was compiled by us. On average, visitors clicked six times, stayed on a website for over three minutes, and only around one out of four users converted. While engagement was usually good, the statistics also reveal that duration of stay and scrolling distance varied greatly. Prior to conducting a more thorough analysis, these summaries assist us in comprehending the general behaviour of users.”
cor_matrix <- cor(tech_sites[, c("Session_Duration", "Scroll_Depth", "Click_Count",
"Load_Time", "JS_Error_Count", "Engagement_Index")])
corrplot(cor_matrix, method = "color", type = "upper", tl.cex = 0.8, addCoef.col = "black")
This matrix shows how strongly each pair of metrics is correlated (e.g., scroll depth and engagement).
Let’s go row-by-row:
Session Duration vs Scroll Depth = -0.08 → Basically no relationship. Scrolling more doesn’t really mean users stay longer.
Click Count vs Engagement Index = -0.01 → Again, almost no connection. Just clicking more doesn’t mean users are more engaged.
Load Time vs Engagement Index = -0.03 → Small negative link — if pages load slower, engagement might slightly drop.
JS Error Count vs Engagement Index = 0.01 → Near zero. Errors don’t noticeably affect engagement in this dataset.
Overall? → Most relationships are weak or nonexistent. No strong patterns here.
This graph determines whether certain actions, such as clicking, scrolling, or loading time, are related to one another. The strength of that link is displayed in each square. The two behaviours rise together if the value is close to 1, travel in different directions if it is close to -1, and have no link at all if it is close to 0. Most of the values in our scenario are close to zero. Thus, we can conclude that there is no strong correlation, at least not in a predictable fashion, between any of these behaviours.
plot_ly(data = tech_sites,
x = ~Scroll_Depth,
y = ~Engagement_Index,
type = "scatter",
mode = "markers",
text = ~paste("Site:", Site,
"<br>Session Duration:", round(Session_Duration, 1),
"<br>Clicks:", Click_Count,
"<br>Load Time:", round(Load_Time, 2)),
hoverinfo = "text",
marker = list(size = 10, color = 'rgba(0,123,255,0.7)', line = list(color = 'rgba(0,0,0,0.5)', width = 1))) %>%
layout(title = "Engagement vs Scroll Depth by Site",
xaxis = list(title = "Scroll Depth (%)"),
yaxis = list(title = "Engagement Index"))
[
Show/Hide Explanation ]
“One website is represented by each dot here. We charted the amount of time users spend on the website against their level of engagement. We would observe a pattern if scrolling increased user engagement, but it is not seen here .Since the dots are dispersed over the site, we can conclude that a person is not necessarily more engaged if they scroll more. Perhaps other factors, such as load time, design, or content, are more important.
ggplot(tech_sites, aes(x = as.factor(Converted), y = Engagement_Index, fill = as.factor(Converted))) +
geom_boxplot() +
scale_fill_manual(values = c("#FF9999", "#66CC99")) +
labs(x = "Converted (0 = No, 1 = Yes)", y = "Engagement Index", title = "Engagement by Conversion") +
theme_minimal()
The users who converted and those who did not were divided into two groups at this point. We examined their engagement scores to determine whether higher levels of engagement were associated with higher conversion rates. Unexpectedly, engagement scores were marginally higher for non-converters. This suggests that people may not respond solely because of involvement; perhaps they enjoyed investigating but were thwarted by something.
ggplot(tech_sites, aes(x = Scroll_Depth, y = Session_Duration)) +
geom_point(color = "#3399CC", alpha = 0.7) +
geom_smooth(method = "lm", se = FALSE, color = "darkred") +
labs(title = "Scroll Depth vs Session Duration", x = "Scroll Depth (%)", y = "Session Duration (sec)") +
theme_minimal()
This graph determines whether there is a correlation between a duration of user on a website and how far they scroll. Every dot represents a human. The general pattern is depicted by the red line. As we can see, the line slopes slightly downward, indicating that spending more time scrolling does not result in spending more time. Therefore, in this instance, scrolling depth and session duration are not truly related.
ggplot(tech_sites, aes(x = Load_Time)) +
geom_histogram(fill = "#008080", bins = 20, color = "white") +
labs(title = "Page Load Time Distribution", x = "Load Time (sec)", y = "Count") +
theme_light()
This graph illustrates how quickly webpages load. The majority of them load in two to three seconds, which is really typical. Very few are extremely slow or extremely fast. This is helpful since slow loading times might force users to abandon a page before viewing the information, which lowers engagement.
ggplot(tech_sites, aes(x = Engagement_Index, fill = as.factor(Converted))) +
geom_density(alpha = 0.6) +
scale_fill_manual(values = c("tomato", "darkgreen")) +
labs(title = "Engagement Index Density", x = "Engagement Index") +
theme_minimal()
“This chart shows how engagement scores differ between users who converted and those who didn’t. The green shape shows where most converters are — they’re usually between 75 and 85 on the engagement scale. But the red shape shows that even users with very high engagement (like above 100) sometimes didn’t convert. That means high engagement doesn’t always guarantee a conversion — other things might be stopping them, like confusing buttons, bad layout, or no strong call-to-action.”
# Load required libraries
library(broom)
library(kableExtra)
library(car)
# Fit the regression model
model <- lm(Engagement_Index ~ Session_Duration + Scroll_Depth + Click_Count +
Load_Time + JS_Error_Count, data = tech_sites)
# Tidy and format regression results
tidy(model) %>%
mutate(across(where(is.numeric), round, 4)) %>%
kable(col.names = c("Term", "Estimate", "Std. Error", "t Value", "P-Value"),
caption = "Linear Regression Results: Predicting Engagement Index") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
full_width = FALSE, position = "left")
| Term | Estimate | Std. Error | t Value | P-Value |
|---|---|---|---|---|
| (Intercept) | 85.6801 | 6.6503 | 12.8836 | 0.0000 |
| Session_Duration | -0.0070 | 0.0185 | -0.3805 | 0.7041 |
| Scroll_Depth | -0.0450 | 0.0453 | -0.9946 | 0.3216 |
| Click_Count | -0.0620 | 0.4302 | -0.1441 | 0.8856 |
| Load_Time | -0.5700 | 1.6668 | -0.3420 | 0.7329 |
| JS_Error_Count | 0.2398 | 0.8970 | 0.2673 | 0.7896 |
Let’s Break Down Each Column: Term: The thing we’re testing — like Session Duration, Scroll Depth, etc. Estimate: How much that thing changes the Engagement Index Std. Error: How accurate the estimate is (lower is better) t Value: A stat used to calculate significance (you can ignore this for now) P-Value: The most important column — tells us if the effect is real or not
How to Read It (In Simple English) • Intercept (85.6801) → This is like the starting point. If everything else is zero, engagement would be around 85. Now for the other rows: Term: What It Means in Plain Words Session_Duration: The longer someone stays doesn’t really affect engagement (p = 0.70 → not significant) Scroll_Depth: More scrolling doesn’t mean more engagement (p = 0.32 → not significant) Click_Count: Clicking more also doesn’t predict engagement (p = 0.88 → very weak) Load_Time: Slow load time doesn’t lower engagement in a meaningful way (p = 0.73) JS_Error_Count: More errors also didn’t affect engagement much (p = 0.78)
Using typical user behaviours like time on page, scrolling, clicking, and even mistake counts, we attempted to forecast engagement. However, the findings indicated that none of these factors significantly or consistently affected involvement. To put it another way, engagement is dependent on factors that we may have overlooked, such as user emotion, layout, or content quality.
tech_sites %>%
group_by(Converted) %>%
summarise(
Mean_Engagement = mean(Engagement_Index),
Median_Engagement = median(Engagement_Index),
SD_Engagement = sd(Engagement_Index),
Mean_LoadTime = mean(Load_Time)
) %>%
kable() %>%
kable_styling("striped", full_width = FALSE)
| Converted | Mean_Engagement | Median_Engagement | SD_Engagement | Mean_LoadTime |
|---|---|---|---|---|
| 0 | 81.45856 | 81.85608 | 12.65907 | 2.549729 |
| 1 | 76.52195 | 77.41908 | 11.42496 | 2.548918 |
“Those who converted and those who did not are contrasted in this table. Oddly, engagement scores were somewhat higher for non-converters. They may have been active and exploring, but they may not have discovered what they needed to convert. Speed was probably not the problem because load times were roughly the same for both groups. In general, conversion cannot be explained by engagement alone; instead, we may need to consider user intent, call-to-action clarity, or design.
tech_sites %>%
filter(Converted == 1) %>%
arrange(desc(Engagement_Index)) %>%
select(Site, Engagement_Index, Session_Duration, Scroll_Depth) %>%
head(10) %>%
kable() %>%
kable_styling("striped", full_width = FALSE)
| Site | Engagement_Index | Session_Duration | Scroll_Depth |
|---|---|---|---|
| seek.com.au | 102.24911 | 152.0562 | 32.86464 |
| my.gov.au | 101.26702 | 116.5373 | 60.21097 |
| skyscanner.com.au | 93.97687 | 141.2928 | 76.19873 |
| zip.co/au | 93.78135 | 178.4092 | 41.66658 |
| drive.com.au | 90.60854 | 235.8768 | 45.35376 |
| airtasker.com.au | 85.14580 | 185.2228 | 60.90739 |
| maccas.com.au | 82.33261 | 195.5930 | 33.85615 |
| menulog.com.au | 81.47204 | 180.3068 | 81.59948 |
| commsec.com.au | 81.35924 | 183.6138 | 55.11771 |
| open.edu.au | 80.81427 | 227.4619 | 32.51496 |
The top ten websites where users were most inclined to behave are listed here. We tracked their scrolling distance, time spent, and level of engagement. It is interesting to see that the patterns vary; some websites retain viewers for longer, while others convert users quickly with minimal scrolling. This demonstrates that there is no one-size-fits-all approach; instead, every website has a unique approach that works.
summary_stats <- tech_sites %>%
group_by(Converted) %>%
summarise(
Mean_Session = mean(Session_Duration),
Mean_Clicks = mean(Click_Count),
Mean_Load = mean(Load_Time)
)
write.csv(summary_stats, "summary_by_conversion.csv", row.names = FALSE)
summary_stats %>%
kable() %>%
kable_styling("striped", full_width = FALSE)
| Converted | Mean_Session | Mean_Clicks | Mean_Load |
|---|---|---|---|
| 0 | 174.1301 | 6.052632 | 2.549729 |
| 1 | 178.0062 | 5.666667 | 2.548918 |
We examined behaviourof users according to whether or not they converted. It is interesting to note that conversions resulted in somewhat longer stays but fewer clicks. By finding what they need and acting without wasting clicks, this shows that those who convert may be more focused. Page speed did not really matter; it was the same for everyone.
library(broom)
library(kableExtra)
# Run ANOVA model
anova_model <- aov(Engagement_Index ~ as.factor(Converted), data = tech_sites)
# Tidy and display as clean table
tidy(anova_model) %>%
mutate(across(where(is.numeric), round, 4)) %>%
kable(col.names = c("Source", "DF", "Sum Sq", "Mean Sq", "F Value", "P-Value"),
caption = "ANOVA Test: Difference in Engagement Between Converted and Non-Converted Users") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
full_width = FALSE, position = "left")
| Source | DF | Sum Sq | Mean Sq | F Value | P-Value |
|---|---|---|---|---|---|
| as.factor(Converted) | 1 | 666.7667 | 666.7667 | 4.3516 | 0.0387 |
| Residuals | 148 | 22677.0305 | 153.2232 | NA | NA |
What Each Column Means:
DF: Just how many groups we’re comparing (1 group difference here)
Sum Sq: How much total variation we’re seeing
Mean Sq: Average variation for that group
F Value: A score that tells how different the groups are
P-Value: The most important — tells us if the difference is real or just by chance
To determine whether conversion rate differs from non-conversion rate, we employed a statistical test. We obtained a p-value of 0.0387 from the result, which is less than the conventional cutoff of 0.05. This indicates that the disparity in engagement is genuine rather than merely coincidental. Therefore, we can state with confidence that, at least in terms of participation, persons who convert behave differently from those who do not.
library(car)
vif(model)
## Session_Duration Scroll_Depth Click_Count Load_Time
## 1.012740 1.025174 1.010750 1.004212
## JS_Error_Count
## 1.028652
[
Show/Hide Explanation ]
We made sure that our input variables were not expressing the same thing before creating a predictive model. We refer to this as a multicollinearity check. Since all of the VIF values were near to 1, each variable is distinct and valuable. That reassures us that our model would not become confused.
par(mfrow = c(2, 2))
plot(model)
What’s a Residual? A residual is the difference between what our model predicted and what actually happened. Example: If the model says a user engagement should be 85, but they actually scored 90, then the residual is +5.
These plots help us check how well our model is behaving.
What it shows: Are errors randomly spread?
What we want: A cloud of dots scattered evenly around the horizontal line (0).
What we see: ✅ The dots are spread randomly — no clear curve, so this is good.
Say This:
“There’s no obvious pattern here, which means our model’s errors are random. That’s a good sign — the model isn’t biased.”
What it shows: Do errors follow a normal distribution (bell curve)?
What we want: Dots should follow the diagonal line.
What we see: Dots mostly follow the line, except for a couple of points on the ends (outliers).
Say This:
“This tells us the errors mostly follow a normal pattern, which means our model assumptions are holding up well.”
What we want: A flat red line with dots scattered evenly.
What we see: Tt is mostly flat — no funnel or wave shape.
Say This:
“The errors are consistently spread across predictions, which means the model works similarly for low and high engagement users.”
What we want: No dots very far from others.
What we see: Most dots are safe. Only a couple (like 108 and 32) stand out.
Say This:
“Most data points are fine, with just a couple of slightly influential ones. Nothing extreme, so no major concerns here.”
Overall Conclusion: “These plots confirm our model is pretty stable. The errors are random, normally distributed, and not driven by any extreme values. This gives us confidence that the model is statistically sound.”
library(plotly)
# Prepare numeric data
numeric_vars <- tech_sites[, c("Session_Duration", "Scroll_Depth", "Click_Count",
"Load_Time", "JS_Error_Count", "Engagement_Index")]
# Run PCA
pca_result <- prcomp(numeric_vars, scale. = TRUE)
pca_df <- as.data.frame(pca_result$x)
pca_df$Site <- if ("Site" %in% colnames(tech_sites)) tech_sites$Site else rownames(tech_sites)
# Interactive PCA plot
plot_ly(pca_df, x = ~PC1, y = ~PC2, type = 'scatter', mode = 'markers',
text = ~paste("Site:", Site,
"<br>PC1:", round(PC1, 2),
"<br>PC2:", round(PC2, 2)),
marker = list(size = 8, color = 'rgba(0,123,255,0.7)',
line = list(width = 1, color = 'rgba(0,0,0,0.5)'))) %>%
layout(title = "Interactive PCA: PC1 vs PC2",
xaxis = list(title = "Principal Component 1"),
yaxis = list(title = "Principal Component 2"))
[
Show/Hide Explanation ]
Similar to condensing numerous columns into two significant directions, this graphic distils complex behaviour data into two main patterns. Based on the combined user behaviours, each dot displays a website. The dispersed dots indicate that user involvement differs greatly between websites, and we can utilise this information to group or cluster websites that exhibit comparable trends.
set.seed(123)
dist_matrix <- dist(scale(numeric_vars))
cluster_result <- hclust(dist_matrix, method = "ward.D2")
plot(cluster_result, labels = FALSE, main = "Hierarchical Clustering of Sites")
This graph classifies websites according to user behaviour. Websites that connect lower down are more comparable; perhaps users click about similarly, remain for the same amount of time, or scroll in the same manner. The groups become increasingly distinct as we ascend. In order to create tidy clusters, such as three to five groups of websites with comparable engagement patterns, we can draw a horizontal line across this figure.