SUMMARY PAGEVIEWS

Looking at MSBA page view summary, converting data types

str(summary)
## 'data.frame':    11 obs. of  8 variables:
##  $ Page             : chr  "/msba/" "/msba/admissions/" "/msba/courses/" "/msba/faq/" ...
##  $ Pageviews        : chr  "28,767" "13,883" "8,810" "5,332" ...
##  $ Unique Pageviews : chr  "19,361" "10,256" "6,563" "4,358" ...
##  $ Avg. Time on Page: chr  "0:01:14" "0:01:59" "0:02:01" "0:02:26" ...
##  $ Entrances        : chr  "9,266" "2,706" "1,046" "635" ...
##  $ Bounce Rate      : chr  "26.67%" "50.35%" "50.87%" "54.22%" ...
##  $ % Exit           : chr  "24.04%" "40.81%" "33.50%" "36.10%" ...
##  $ Page Value       : chr  "$0.00" "$0.00" "$0.00" "$0.00" ...
summary %<>% clean_names()

summary$pageviews <- str_remove(summary$pageviews, ",") %>% as.numeric()
summary$unique_pageviews <- str_remove(summary$unique_pageviews, ",") %>% as.numeric()
summary$entrances <- str_remove(summary$entrances, ",") %>% as.numeric()
summary$bounce_rate <- str_sub(summary$bounce_rate, end = -2) %>% as.numeric()
summary$percent_exit <- str_sub(summary$percent_exit, end = -2) %>% as.numeric()
summary$page_value <- str_sub(summary$page_value, start = 2) %>% as.numeric()

summary %<>%
  mutate(bounce_rate = bounce_rate / 100,
         percent_exit = percent_exit / 100,
         sec = as.numeric(str_sub(avg_time_on_page, start = -2)),
         min = as.numeric(str_sub(avg_time_on_page, start = -5, end = -4)),
         avg_time_on_page = min*60 + sec) %>% 
  dplyr::select(-c("sec", "min"))
str(summary)
## 'data.frame':    11 obs. of  8 variables:
##  $ page            : chr  "/msba/" "/msba/admissions/" "/msba/courses/" "/msba/faq/" ...
##  $ pageviews       : num  28767 13883 8810 5332 2383 ...
##  $ unique_pageviews: num  19361 10256 6563 4358 1885 ...
##  $ avg_time_on_page: num  74 119 121 146 114 106 59 124 19 230 ...
##  $ entrances       : num  9266 2706 1046 635 250 ...
##  $ bounce_rate     : num  0.267 0.504 0.509 0.542 0.549 ...
##  $ percent_exit    : num  0.24 0.408 0.335 0.361 0.288 ...
##  $ page_value      : num  0 0 0 0 0 0 0 0 0 0 ...
summary
##                                         page pageviews unique_pageviews
## 1                                     /msba/     28767            19361
## 2                          /msba/admissions/     13883            10256
## 3                             /msba/courses/      8810             6563
## 4                                 /msba/faq/      5332             4358
## 5                      /msba/career-outlook/      2383             1885
## 6                             /msba/faculty/      1291              737
## 7                          /msba/experience/       873              689
## 8                            /msba/capstone/        79               38
## 9                             /msba-landing/         4                4
## 10 /degree-programs/attachment/msba-550-395/         3                3
## 11                                     Total     61434            43900
##    avg_time_on_page entrances bounce_rate percent_exit page_value
## 1                74      9266      0.2667       0.2404          0
## 2               119      2706      0.5035       0.4081          0
## 3               121      1046      0.5087       0.3350          0
## 4               146       635      0.5422       0.3610          0
## 5               114       250      0.5490       0.2879          0
## 6               106       229      0.3906       0.2750          0
## 7                59       110      0.3950       0.1753          0
## 8               124        22      0.5652       0.3418          0
## 9                19         1      0.0000       0.0000          0
## 10              230         1      0.0000       0.3333          0
## 11               97     14269      0.3498       0.3041          0
df <- summary[1:8,]
df
##                    page pageviews unique_pageviews avg_time_on_page entrances
## 1                /msba/     28767            19361               74      9266
## 2     /msba/admissions/     13883            10256              119      2706
## 3        /msba/courses/      8810             6563              121      1046
## 4            /msba/faq/      5332             4358              146       635
## 5 /msba/career-outlook/      2383             1885              114       250
## 6        /msba/faculty/      1291              737              106       229
## 7     /msba/experience/       873              689               59       110
## 8       /msba/capstone/        79               38              124        22
##   bounce_rate percent_exit page_value
## 1      0.2667       0.2404          0
## 2      0.5035       0.4081          0
## 3      0.5087       0.3350          0
## 4      0.5422       0.3610          0
## 5      0.5490       0.2879          0
## 6      0.3906       0.2750          0
## 7      0.3950       0.1753          0
## 8      0.5652       0.3418          0

Landing & degree programs seem like defunct pages? Very low pageviews. And degree programs with a weirdly long “avg_time_on_page”.

Viz

ggplot(df, aes(reorder(page, -pageviews), pageviews)) + geom_col() + 
  theme(axis.text.x = element_text(angle = 90))

ggplot(df, aes(reorder(page, -unique_pageviews), unique_pageviews)) + geom_col() + 
  theme(axis.text.x = element_text(angle = 90))

ggplot(df, aes(reorder(page, -avg_time_on_page), avg_time_on_page)) + geom_col() + 
  theme(axis.text.x = element_text(angle = 90))

ggplot(df, aes(reorder(page, -entrances), entrances)) + geom_col() + 
  theme(axis.text.x = element_text(angle = 90))

ggplot(df, aes(reorder(page, -bounce_rate), bounce_rate)) + geom_col() + 
  theme(axis.text.x = element_text(angle = 90))

ggplot(df, aes(reorder(page, -percent_exit), percent_exit)) + geom_col() + 
  theme(axis.text.x = element_text(angle = 90))

What is the landing page? Doesn’t seem to have a lot of pageviews, if it is truly a “landing” page. Are people coming to the website from some other source? What leads to landing page vs /msba/?

Bounce rate - “% of visits that were the only one in the session”, if so then a LOT of people bouncing from Capstone & career outlook. Capstone has least number of views but highest bounce.

Lot of time spent reading FAQ.
Most people come in through main page.

ggplot(df, aes(pageviews, unique_pageviews)) + geom_point()

PerformanceAnalytics::chart.Correlation(df[,-c(1,8)])

VIEWS OVER TIME

str(views)
## 'data.frame':    290 obs. of  2 variables:
##  $ Day Index: chr  "8/1/2020" "8/2/2020" "8/3/2020" "8/4/2020" ...
##  $ Pageviews: chr  "0" "0" "0" "0" ...
views %<>% clean_names()

views$pageviews <- str_remove(views$pageviews, ",") %>% as.numeric()
views$day_index <- as.Date(views$day_index, format = "%m/%d/%Y")
str(views)
## 'data.frame':    290 obs. of  2 variables:
##  $ day_index: Date, format: "2020-08-01" "2020-08-02" ...
##  $ pageviews: num  0 0 0 0 0 0 0 0 0 0 ...

Viz

ggplot(views[-290,], aes(day_index, pageviews)) + geom_line()

views_ts <- ts(views$pageviews[-290], frequency = 365, start = c(2020, 213))

autoplot(views_ts) + 
  scale_x_continuous(breaks = seq(1, 351, by = 24), 
                     labels = seq(1992, 2021, by = 2))

views[-290,] %>% 
  ggplot(aes(day_index, pageviews)) + geom_line() + 
  scale_x_date(labels = date_format(format= "%b %Y"),breaks = date_breaks("1 month")) +
  ggtitle("Pageviews over time")

Something clearly changed in early September 2020. Was that initial page launch?

Filtering from 9/8/20

# ggplot(views[39:289,], aes(day_index, pageviews)) + geom_line()

views_ts_limit <- ts(views$pageviews[39:289], frequency = 365, start = c(2020, 251))


views[39:289,] %>% 
  ggplot(aes(day_index, pageviews)) + geom_line() + 
  scale_x_date(labels = date_format(format= "%b %Y"),breaks = date_breaks("1 month")) +
  ggtitle("Pageviews over time")

Can see a slight curve from Sep to late Dec 2020, could be due to incoming class checking the website. Another curve (up and down) starting in Jan and decreasing. Could this be related to applicaiton deadlines? (early priority in Jan, priority in Mar, and final in June).