Looking at MSBA page view summary, converting data types
str(summary)
## 'data.frame': 11 obs. of 8 variables:
## $ Page : chr "/msba/" "/msba/admissions/" "/msba/courses/" "/msba/faq/" ...
## $ Pageviews : chr "28,767" "13,883" "8,810" "5,332" ...
## $ Unique Pageviews : chr "19,361" "10,256" "6,563" "4,358" ...
## $ Avg. Time on Page: chr "0:01:14" "0:01:59" "0:02:01" "0:02:26" ...
## $ Entrances : chr "9,266" "2,706" "1,046" "635" ...
## $ Bounce Rate : chr "26.67%" "50.35%" "50.87%" "54.22%" ...
## $ % Exit : chr "24.04%" "40.81%" "33.50%" "36.10%" ...
## $ Page Value : chr "$0.00" "$0.00" "$0.00" "$0.00" ...
summary %<>% clean_names()
summary$pageviews <- str_remove(summary$pageviews, ",") %>% as.numeric()
summary$unique_pageviews <- str_remove(summary$unique_pageviews, ",") %>% as.numeric()
summary$entrances <- str_remove(summary$entrances, ",") %>% as.numeric()
summary$bounce_rate <- str_sub(summary$bounce_rate, end = -2) %>% as.numeric()
summary$percent_exit <- str_sub(summary$percent_exit, end = -2) %>% as.numeric()
summary$page_value <- str_sub(summary$page_value, start = 2) %>% as.numeric()
summary %<>%
mutate(bounce_rate = bounce_rate / 100,
percent_exit = percent_exit / 100,
sec = as.numeric(str_sub(avg_time_on_page, start = -2)),
min = as.numeric(str_sub(avg_time_on_page, start = -5, end = -4)),
avg_time_on_page = min*60 + sec) %>%
dplyr::select(-c("sec", "min"))
str(summary)
## 'data.frame': 11 obs. of 8 variables:
## $ page : chr "/msba/" "/msba/admissions/" "/msba/courses/" "/msba/faq/" ...
## $ pageviews : num 28767 13883 8810 5332 2383 ...
## $ unique_pageviews: num 19361 10256 6563 4358 1885 ...
## $ avg_time_on_page: num 74 119 121 146 114 106 59 124 19 230 ...
## $ entrances : num 9266 2706 1046 635 250 ...
## $ bounce_rate : num 0.267 0.504 0.509 0.542 0.549 ...
## $ percent_exit : num 0.24 0.408 0.335 0.361 0.288 ...
## $ page_value : num 0 0 0 0 0 0 0 0 0 0 ...
summary
## page pageviews unique_pageviews
## 1 /msba/ 28767 19361
## 2 /msba/admissions/ 13883 10256
## 3 /msba/courses/ 8810 6563
## 4 /msba/faq/ 5332 4358
## 5 /msba/career-outlook/ 2383 1885
## 6 /msba/faculty/ 1291 737
## 7 /msba/experience/ 873 689
## 8 /msba/capstone/ 79 38
## 9 /msba-landing/ 4 4
## 10 /degree-programs/attachment/msba-550-395/ 3 3
## 11 Total 61434 43900
## avg_time_on_page entrances bounce_rate percent_exit page_value
## 1 74 9266 0.2667 0.2404 0
## 2 119 2706 0.5035 0.4081 0
## 3 121 1046 0.5087 0.3350 0
## 4 146 635 0.5422 0.3610 0
## 5 114 250 0.5490 0.2879 0
## 6 106 229 0.3906 0.2750 0
## 7 59 110 0.3950 0.1753 0
## 8 124 22 0.5652 0.3418 0
## 9 19 1 0.0000 0.0000 0
## 10 230 1 0.0000 0.3333 0
## 11 97 14269 0.3498 0.3041 0
df <- summary[1:8,]
df
## page pageviews unique_pageviews avg_time_on_page entrances
## 1 /msba/ 28767 19361 74 9266
## 2 /msba/admissions/ 13883 10256 119 2706
## 3 /msba/courses/ 8810 6563 121 1046
## 4 /msba/faq/ 5332 4358 146 635
## 5 /msba/career-outlook/ 2383 1885 114 250
## 6 /msba/faculty/ 1291 737 106 229
## 7 /msba/experience/ 873 689 59 110
## 8 /msba/capstone/ 79 38 124 22
## bounce_rate percent_exit page_value
## 1 0.2667 0.2404 0
## 2 0.5035 0.4081 0
## 3 0.5087 0.3350 0
## 4 0.5422 0.3610 0
## 5 0.5490 0.2879 0
## 6 0.3906 0.2750 0
## 7 0.3950 0.1753 0
## 8 0.5652 0.3418 0
Landing & degree programs seem like defunct pages? Very low pageviews. And degree programs with a weirdly long “avg_time_on_page”.
ggplot(df, aes(reorder(page, -pageviews), pageviews)) + geom_col() +
theme(axis.text.x = element_text(angle = 90))
ggplot(df, aes(reorder(page, -unique_pageviews), unique_pageviews)) + geom_col() +
theme(axis.text.x = element_text(angle = 90))
ggplot(df, aes(reorder(page, -avg_time_on_page), avg_time_on_page)) + geom_col() +
theme(axis.text.x = element_text(angle = 90))
ggplot(df, aes(reorder(page, -entrances), entrances)) + geom_col() +
theme(axis.text.x = element_text(angle = 90))
ggplot(df, aes(reorder(page, -bounce_rate), bounce_rate)) + geom_col() +
theme(axis.text.x = element_text(angle = 90))
ggplot(df, aes(reorder(page, -percent_exit), percent_exit)) + geom_col() +
theme(axis.text.x = element_text(angle = 90))
What is the landing page? Doesn’t seem to have a lot of pageviews, if it is truly a “landing” page. Are people coming to the website from some other source? What leads to landing page vs /msba/?
Bounce rate - “% of visits that were the only one in the session”, if so then a LOT of people bouncing from Capstone & career outlook. Capstone has least number of views but highest bounce.
Lot of time spent reading FAQ.
Most people come in through main page.
ggplot(df, aes(pageviews, unique_pageviews)) + geom_point()
PerformanceAnalytics::chart.Correlation(df[,-c(1,8)])
str(views)
## 'data.frame': 290 obs. of 2 variables:
## $ Day Index: chr "8/1/2020" "8/2/2020" "8/3/2020" "8/4/2020" ...
## $ Pageviews: chr "0" "0" "0" "0" ...
views %<>% clean_names()
views$pageviews <- str_remove(views$pageviews, ",") %>% as.numeric()
views$day_index <- as.Date(views$day_index, format = "%m/%d/%Y")
str(views)
## 'data.frame': 290 obs. of 2 variables:
## $ day_index: Date, format: "2020-08-01" "2020-08-02" ...
## $ pageviews: num 0 0 0 0 0 0 0 0 0 0 ...
ggplot(views[-290,], aes(day_index, pageviews)) + geom_line()
views_ts <- ts(views$pageviews[-290], frequency = 365, start = c(2020, 213))
autoplot(views_ts) +
scale_x_continuous(breaks = seq(1, 351, by = 24),
labels = seq(1992, 2021, by = 2))
views[-290,] %>%
ggplot(aes(day_index, pageviews)) + geom_line() +
scale_x_date(labels = date_format(format= "%b %Y"),breaks = date_breaks("1 month")) +
ggtitle("Pageviews over time")
Something clearly changed in early September 2020. Was that initial page launch?
# ggplot(views[39:289,], aes(day_index, pageviews)) + geom_line()
views_ts_limit <- ts(views$pageviews[39:289], frequency = 365, start = c(2020, 251))
views[39:289,] %>%
ggplot(aes(day_index, pageviews)) + geom_line() +
scale_x_date(labels = date_format(format= "%b %Y"),breaks = date_breaks("1 month")) +
ggtitle("Pageviews over time")
Can see a slight curve from Sep to late Dec 2020, could be due to incoming class checking the website. Another curve (up and down) starting in Jan and decreasing. Could this be related to applicaiton deadlines? (early priority in Jan, priority in Mar, and final in June).