library(tidyverse)
library(kableExtra)
library(tidyverse)
library(dplyr) # for data manipulation
library(lubridate) # for date-time operations
library(ggplot2) # for data visualization
library(readr) # for reading CSV files
<- read_csv("planning_alerts_data.csv")
Data <- read_csv("planning_alerts_data.csv") %>%
data_renamed mutate(tfc_stamped_dt = ymd_hms(tfc_stamped),hour = hour(tfc_stamped_dt),day = date(tfc_stamped_dt),week = week(tfc_stamped_dt),month = month(tfc_stamped_dt, label = TRUE)) %>%
select(tfc_id, tfc_stamped_dt, tfc_cookie:tfc_referrer, hour, day, week, month) %>%
rename(tfc_stamped = tfc_stamped_dt)
<- data_renamed %>%
user_counts group_by(hour, day, week, month, tfc_device_type) %>% # Group
summarise(unique_users = n_distinct(tfc_cookie)) # Count unique users
# Visualization of Unique Users by Month and Device Type
ggplot(user_counts, aes(x = month, y = unique_users, fill = tfc_device_type)) +
geom_bar(stat = "identity",position = "dodge") +
ggtitle("Number of Users by Month") +
xlab("Month") + ylab("Unique Users") +
scale_fill_manual(values = c("Android App"= "antiquewhite","Desktop"="antiquewhite4", "iPhone App" = "burlywood", "Mobile (browser)" = "lightgoldenrod1" , "Tablet (browser)" = "lavenderblush2"))+
theme_bw() +
theme(plot.title = element_text(size = 14, face = "bold", hjust = 0.5))
Planning Alerts Project ie
INTRODUCTION
This report is created for PlanningAlerts.ie, a service that allows users to subscribe for notifications about Irish planning applications that may affect them. The aim of this report is to analyze the website usage data to uncover key insights into user behavior, website performance, and engagement patterns. By exploring various metrics related to user interactions with the website, this analysis helps to better understand how visitors engage with the site and how different factors, such as device type and time intervals, influence their behavior.
1. User and Session Analysis
a.Unique User by month
Monthly Trends: Peak user activity occurs in July, with notable engagement continuing into August.
Desktop Dominance: Desktop usage leads in July and August, suggesting preference for detailed searches and planning applications, which benefit from larger screens.
Mobile Growth: Mobile browser usage shows steady growth, particularly in August, reflecting increased on-the-go engagement.
Tablet and App Usage: Tablet and iPhone app usage remain consistently low with minimal month-to-month variation, indicating limited adoption.
Seasonal Insights: High activity during summer suggests heightened interest in planning approvals, property investments, or related activities during this period.
b.Unique Session by month
Overall Activity: July records the highest number of sessions, reflecting a peak in user activity engagement.
Desktop Usage: Desktops remain dominant; it is where the engagement is highest, especially during July, followed by August. This suggests that desktops are still a go-to device when more detailed planning activities are necessary, even in the summer.
Mobile Browser Growth: The mobile browser has shown sturdy growth in every month, showing consistency in mobile engagement during these months.
Android App Usage: Android app activity is highest in June, dropping significantly in subsequent months, suggesting reduced app-based interactions as the season progresses.
Tablet and iPhone App Usage: Both tablet browsers and the iPhone app usage show consistent low usage across all months, indicating a minor reliance of users on these platforms.
Seasonal Trends: Peaks in July probably reflect summer interest in planning and property activities; desktop dominance supports the case for better device type usage for detailed research or applications.
<- data_renamed %>%
Sessions_count group_by(hour, day, week, month, tfc_device_type) %>%
summarise(total_session = n_distinct(tfc_session))
######################################################################################################################################################
# Visualization of Total Sessions by Month and Device Type
######################################################################################################################################################
ggplot(Sessions_count, aes(x = month, y = total_session, fill = tfc_device_type)) +
geom_bar(stat = "identity",position = "dodge") +
ggtitle("Total Sessions by Month and Device Type") +
xlab("Month") + ylab("unique_users") +
scale_fill_manual(values = c("Android App"= "antiquewhite","Desktop"="antiquewhite4", "iPhone App" = "burlywood", "Mobile (browser)" = "lightgoldenrod1" , "Tablet (browser)" = "gray50"))+
theme_bw() +
theme(plot.title = element_text(size = 14, face = "bold", hjust = 0.5))
c.Count unique users referred to website by Google
The following are the user data on the website, differentiated between those who accessed through Google and the ones who did not.
Non-Google users (No): The total users are higher compared to unique users. This shows repetition in visits.
Google users (Yes): Though the total users are lesser, the unique users are higher in proportion, indicating that Google visitors are more likely to be unique or first-time users.
<- mutate(Data, google_referrer = case_when(
Data str_detect(tfc_referrer, regex('google', ignore_case = T)) ~ "Yes",
TRUE ~ "No"))
<- Data %>%
google_referred_users group_by(google_referrer) %>% #Group Google referred
summarise(number_users = n(), #Count total users referred to website by Google.
number_unique_users = n_distinct(tfc_cookie)) #Count unique users referred to website by Google
arrange(google_referred_users)
# A tibble: 2 × 3
google_referrer number_users number_unique_users
<chr> <int> <int>
1 No 327793 154618
2 Yes 72422 46216
::kable(google_referred_users,
knitrdigits = c(0,0),
align = "lrr",
col.names = c("From Google", "Total users","Unique users"),
caption = "Traffic from Google") %>%
kable_styling(full_width = F) %>%
row_spec(0, bold = TRUE, color = "dodgerblue2", background = "cornsilk3") %>%
column_spec(1, italic = TRUE, bold = TRUE, color = "dodgerblue2", background = "cornsilk3") %>%
column_spec(2, color = "dodgerblue2", background = "bisque")%>%
column_spec(3, color = "dodgerblue2", background = "bisque" )
From Google | Total users | Unique users |
---|---|---|
No | 327793 | 154618 |
Yes | 72422 | 46216 |
d.Average session length
The Average session length is 2193 sec, or about 36.5 minutes, would mean that users really do spend quite a lot of time on the site, likely due to detailed browsing or research. That is a good insight regarding engagement but still it could be also because of complex navigation or long search of user which may need simplification or optimization.
# We will calculate the duration of each session (difference between first and last visit in a session)
<- data_renamed %>%
session_duration group_by(tfc_session) %>%
summarise(session_start = min(tfc_stamped),
session_end = max(tfc_stamped)) %>%
mutate(session_length = as.numeric(difftime(session_end, session_start, units = "mins"))) %>%
summarise(avg_session_length = mean(session_length))
arrange(session_duration)
# A tibble: 1 × 1
avg_session_length
<dbl>
1 2193.
#Double check this
::kable(session_duration,
knitrdigits = c(0,0),
align = "c",
col.names = c("session_duration")) %>%
kable_styling(full_width = F) %>%
column_spec(1, underline = FALSE, bold = TRUE, color = "dodgerblue2", background = "cornsilk3")
session_duration |
---|
2193 |
e.Average session length by device type
According to the bar graph, the average session duration for iPhone users is the longest, followed by mobile browser users. Third place goes to desktop sessions, fourth place goes to tablet users, and the shortest session duration goes to Android app users.
Due to the improved user experience, quicker surfing, or interesting material, iPhone users probably spend more time. Sessions for Android apps are the shortest, which could indicate software restrictions or a less interesting user experience.
This data indicates that enhancing the user experience, particularly on Android tablets and applications, may increase engagement on all platforms.
# This will show if session duration differs by device type.
<- data_renamed %>%
session_by_device group_by(tfc_session, tfc_device_type) %>%
summarise(session_start = min(tfc_stamped),
session_end = max(tfc_stamped)) %>%
mutate(session_length = as.numeric(difftime(session_end, session_start, units = "mins"))) %>%
group_by(tfc_device_type) %>%
summarise(avg_session_length_device = mean(session_length))
# Visualization of Average Session Length by Device Type
ggplot(session_by_device, aes(x = tfc_device_type, y = avg_session_length_device, fill = tfc_device_type)) +
geom_bar(stat = "identity", show.legend = FALSE) +
scale_fill_manual(values = c("antiquewhite", "antiquewhite4", "burlywood", "lightgoldenrod1", "gray50")) +
labs(title = "Average Session Length by Device Type (Minutes)",
x = "Device Type", y = "Average Session Length (Minutes)") +
theme_bw() +
theme(plot.title = element_text(size = 14, face = "bold", hjust = 0.5))
f.Bounce Rate (Sessions with Only One Page View)
A high bounce rate for Planning Alerts could be due to irrelevant content, slow page load times, or poor user experience, causing users to leave quickly. It could also be the result of having unclear navigation or drawing irrelevent audience.
The bounce rate can be decreased by improving user flow, site speed, and content.
# A bounce occurs when a user views only one page in a session. This metric is critical for understanding engagement.
<- data_renamed %>%
bounce_rate group_by(tfc_session) %>% #groups the data by each unique session
summarise(pages_visited = n_distinct(tfc_full_url)) %>% #calculates the number of distinct pages viewed (tfc_full_url) by the user.
summarise(bounce_sessions = sum(pages_visited == 1), #counts the number of sessions where only one page was visited (pages_visited == 1), indicating a bounce.
total_sessions = n(), #calculates the total number of sessions
bounce_rate_percentage = (bounce_sessions / total_sessions) * 100) #calculates the bounce rate as a percentage
# Calculate bounce and non-bounce percentages
<- data.frame (
bounce_percentage_data Category = c("Bounce", "Non-Bounce"),
Percentage = c(96.17, 100 - 96.17))
# Plot a pie chart
ggplot(bounce_percentage_data, aes(x = "", y = Percentage, fill = Category)) +
geom_bar(width = 1, stat = "identity") +
coord_polar("y") +
labs(title = "Bounce Rate Percentage") +
scale_fill_manual(values = c("Bounce" = "antiquewhite4", "Non-Bounce" = "burlywood")) +
theme_void() +
theme(plot.title = element_text(size = 14, face = "bold", hjust = 0.5))
2. Visitor Segmentation and Behavior
a.Frequency of User Visits - Once-Off vs Repeat Visitors
Numerous individuals may visit Planning Alerts for a single, specific needs, as seen by the data showing a low number of repeat customers and a high number of once off users. This implies a lack of continuous engagement or low user retention.
Focusing on user retention tactics like frequent updates, customized alert promote more frequent usage and boost repeat visits.
<- data_renamed %>%
User_sessions group_by(tfc_cookie) %>%
summarise(sessions_per_user = n_distinct(tfc_session)) %>%
mutate(visitors_type=ifelse(sessions_per_user==1,"once-off", "Repeat"))
# Calculate the proportions
<- User_sessions %>%
user_sessions_summary count(visitors_type) %>%
mutate(percentage = n / sum(n) * 100)
# Plot as donut chart
ggplot(user_sessions_summary, aes(x = 2, y = n, fill = visitors_type)) +
geom_col(width = 0.5, color = "white") +
coord_polar(theta = "y") +
xlim(1, 2.5) +
theme_void() +
geom_text(aes(label = paste0(round(percentage, 1), "%")),
position = position_stack(vjust = 0.5),
size = 3,
color = "white",
fontface = "bold") +
scale_fill_manual(values = c("antiquewhite4", "burlywood")) +
labs(title = "Once-Off vs Repeat Visitors", fill = "Visitor Type") + # Label
theme(plot.title = element_text(size = 14, face = "bold", hjust = 0.5),
legend.position = "right",
legend.title = element_text(face = "bold"),
legend.text = element_text(size = 10))
b.Average Pages Clicked per Session
Users are only looking at a small number of pages during their visits, as indicated by the average of 1.41 pages per session. This could mean that users are getting the information they need fast, but it could also mean that they are not using the site very much or are not exploring it very much.
The low number of pages each session may indicate areas where user experience and content visibility might be improved.
<- data_renamed %>%
Average_pages_per_session group_by(tfc_session) %>%
summarise(pages_per_session = n()) %>%
summarise(avg_pages = mean(pages_per_session))
arrange(Average_pages_per_session)
# A tibble: 1 × 1
avg_pages
<dbl>
1 1.41
::kable(Average_pages_per_session,
knitrdigits = c(0,0),
align = "c",
col.names = c("Average pages per session")) %>%
kable_styling(full_width = F) %>%
column_spec(1, underline = FALSE, bold = TRUE, color = "dodgerblue2", background = "cornsilk3")
Average pages per session |
---|
1 |
c.Average Number of Pages Clicked per User
An average of 1.14 pages per user suggests limited engagement, with users likely finding what they need quickly.This might point to one-time visits for certain data.
Enhancing content or providing suggestions to promote deeper exploration are two ways to make it better.
# We will calculate the number of pages visited by each user and then calculate the average.
<- data_renamed %>%
pages_per_user group_by(tfc_cookie) %>%
summarise(pages_visited = n_distinct(tfc_full_url)) %>%
summarise(avg_pages_per_user = mean(pages_visited))
arrange(pages_per_user)
# A tibble: 1 × 1
avg_pages_per_user
<dbl>
1 1.14
::kable(pages_per_user,
knitrdigits = c(0,0),
align = "c",
col.names = c("Average pages per user")) %>%
kable_styling(full_width = F) %>%
column_spec(1, underline = FALSE, bold = TRUE, color = "dodgerblue2", background = "cornsilk3")
Average pages per user |
---|
1 |
3. User Journey Analysis
a.Common User Journeys on the Website
- Web Pages (400,215): This is the total number of pages that may be accessed or viewed on the website.
- Unique Users (189,028): This indicates that during the specified time frame, 189,028 unique users visited the website.
- Unique Sessions (283,639): This means that the website had 283,639 distinct user visits, or sessions.
This implies that users are actively browsing the website’s available pages and that it receives consistent traffic. However, enhancing the experience may be possible with user journey optimization.
<- Data %>% #Count the number of webpages
Top_level_stats summarise(number_webpages = n(), #n() Count number of rows
number_unique_users = n_distinct(tfc_cookie), #Count the number of unique user
number_unique_sessions = n_distinct(tfc_session)) #Count the number of unique session
arrange(Top_level_stats)
# A tibble: 1 × 3
number_webpages number_unique_users number_unique_sessions
<int> <int> <int>
1 400215 189028 283639
::kable(Top_level_stats,
knitrdigits = c(0,0),
align = "ccc",
col.names = c("No.Web Pages", "Unique Users","Unique Sessions"),
caption = " USER JOURNEYS ON THE WEBSITE ") %>%
kable_styling(full_width = F) %>%
row_spec(0, bold = TRUE, color = "gray28", background = "cornsilk3")
No.Web Pages | Unique Users | Unique Sessions |
---|---|---|
400215 | 189028 | 283639 |
b.Exit Pages (Where Users End Their Session)
The “About” page has higher exits. This implies that users might be rapidly losing interest after reading through the company’s basic details. Likewise, a high blog page exit rate may suggest that readers are not finding the content interesting enough to continue exploring or take the intended activities.
Both of these patterns might point to ways to improve the relevancy of the material or promote more in-depth interaction with other website elements.
# Exit pages indicate the last page users viewed before leaving. This can identify areas of friction.
<- data_renamed %>%
exit_pages group_by(tfc_session) %>%
summarise(exit_page = last(tfc_full_url)) %>%
group_by(exit_page) %>%
summarise(exit_count = n())
# Visualization of Top Exit Pages
ggplot(exit_pages[1:10,], aes(x = reorder(exit_page, -exit_count), y = exit_count)) +
geom_bar(stat = "identity", fill = "cornsilk3") +
coord_flip() +
labs(title = "Top 10 Exit Pages",
x = "Exit Page", y = "Exit Count") +
theme_light()
c.Which Types of pages are common entry points for unique users
Users are most interested in initial interactions through app-based entrance, as indicated by the top entry points, which reveal that users interact with the application first, then the mobile app.
The “map” and “list” pages imply that people are actively looking through particular material or data based on location.
The “signup” page has a high ranking, suggesting that there is a strong need for more individualized features or experiences as well as a strong interest in user registration.
<- data_renamed %>%
landing_pages group_by(tfc_full_url_screen) %>%
summarise(user_count = n_distinct(tfc_cookie))
library(treemap)
treemap(landing_pages,
index = "tfc_full_url_screen",
vSize = "user_count",
vColor = "user_count",
type = "value",
palette = "RdYlBu",
title = "Landing Pages and Entry Points",
border.col = "white")
4. Cross-Device and Multi-Page Analysis
a.Check how many users use multiple devices
It is likely that relatively few users are transferring between devices throughout their sessions if the cross-device user count is less than 1. This would suggest that the majority of users are using a single device to interact with the platform, most likely out of convenience or personal preference.
<- data_renamed %>%
cross_device_users group_by(tfc_cookie) %>%
summarise(device_count = n_distinct(tfc_device_type)) %>%
filter(device_count > 1)
ggplot(cross_device_users, aes(x = tfc_cookie, y = device_count)) +
geom_tile(aes(fill = device_count), color = "white") +
scale_fill_viridis_c() +
scale_y_continuous(breaks = seq(0, max(cross_device_users$device_count), by = 1)) + # Display only whole numbers
labs(title = "Device Count by User", x = "User ID (tfc_cookie)", y = "Device Count") +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
5. Page-Specific Engagement
a.Count the number of views for planning applications by device type
According to the data, desktop users dominate the platform with the most views, suggesting that people prefer to visit the website on larger screens—possibly for intricate interactions or in-depth research.
Use of mobile browsers is also rather high, indicating a need for portable access. The moderate number of views for Android apps indicates that they are used, but not widely.
Low views for apps on tablets and iPhones indicate less use of these devices.
This may indicate that optimization is required to increase engagement on underutilized devices.
<- data_renamed %>%
App_views filter(!is.na(tfc_application_reference)) %>%
group_by(tfc_device_type) %>%
summarise(app_views_count = n())
::kable(App_views,
knitralign = "cc",
col.names = c("Device", "View count"),
caption = " VIEWS BY DEVICE TYPE ") %>%
kable_styling(full_width = F) %>%
row_spec(0, bold = TRUE, color = "cornsilk3", background = "gray2") %>%
row_spec(1, bold = TRUE, color = "gray28", background = "cornsilk3") %>%
row_spec(2, bold = TRUE, color = "gray28", background = "cornsilk3") %>%
row_spec(3, bold = TRUE, color = "gray28", background = "cornsilk3") %>%
row_spec(4, bold = TRUE, color = "gray28", background = "cornsilk3") %>%
row_spec(5, bold = TRUE, color = "gray28", background = "cornsilk3")
Device | View count |
---|---|
Android App | 21162 |
Desktop | 191820 |
Mobile (browser) | 56510 |
Tablet (browser) | 1879 |
iPhone App | 890 |
CONCLUSION
In conclusion, the majority of PlanningAlerts.ie visitors are summertime desktop users. Although the app is not extensively utilized, mobile usage is increasing. New users are acquired through Google recommendations. On average, users stay on the site for 36.5 minutes, however low page visits and high bounce rates indicate that the site needs to be improved. By concentrating on mobile visitors and providing better content, bounce rates may be decreased and repeat visits may be encouraged.
* END OF THE DOCUMENT *