0.1 Library

library("readr")
library(dplyr)
library(lubridate)
data <- read.csv("~/noobsQA/TEL/appendix.csv")
head(data)

0.2 MOOCs are growing

How the MOOCs are growing from 2012 till 2016? (hint: Use line graph, bargraph..etc. Show years, the number of par-ticipants, and the number of MOOCs in 1 visualization plot).

data$Launch.Date = as.Date(data$Launch.Date, format = "%m/%d/%Y")
library(ggplot2)
options(scipen=999)
graph1 <- ggplot(data = data, aes(x = Launch.Date, y = Participants..Course.Content.Accessed.))+
  geom_line(aes(y = cumsum(Participants..Course.Content.Accessed.)), color = "red", size = 3)+
  theme_minimal()+
  ylab("Number of members") +
  xlab("Launch date")+
  ggtitle("MOOCs are growing from 2012 till 2016")

graph1

library(plotly)
ggplotly(graph1)

Use text analysis to show the most frequent course titles, and another for instructors. (hint1: wordcloud; hint2:watchout for noisy data).

0.3 text analysis

data$Course.Title <- as.character(data$Course.Title)

data_wc <- data %>% group_by(Course.Title) %>% summarise(count = n()) %>% arrange(-count) %>% filter(count > 1)

library(ggwordcloud)

ggplot(data_wc, aes(label = Course.Title, size = count)) +
  geom_text_wordcloud() +
  theme_minimal()

library(stringr)

data$Instructors <- as.character(data$Instructors)

#data_wc2 <- data %>% group_by(Instructors) %>% summarise(count = n()) %>% arrange(-count) %>% filter(count > 1)

data1= data

data3 = data %>% tidyr::separate_rows(Instructors, sep =",")
#data3 = data3 %>% tidyr::gather(key = "Instructor", value = "a", "b", "c", "d", "f")


data3$Instructors <-  trimws(data3$Instructors)

data3$Instructors <- str_replace_all(data3$Instructors, "[\\$,]", "")
data3$Instructors <- str_replace_all(data3$Instructors, "[\\@,]", "a")
data3$Instructors <- str_replace_all(data3$Instructors, "(Bob)", "")
data3$Instructors <- str_replace_all(data3$Instructors, "[\\(]", "")
data3$Instructors <- str_replace_all(data3$Instructors, "[\\)]", "")

data_wc2 = data3 %>% group_by(Instructors) %>% summarise(count = n()) %>% arrange(-count) %>% filter(count > 1)

library(ggwordcloud)

graph6 = ggplot(data_wc2, aes(label = Instructors, size = count)) +
  geom_text_wordcloud() +
  theme_minimal()

graph6

library(tidytext)
library(gutenbergr)
library(wordcloud2)
df_text <- data %>% select(Course.Title, Institution)

df_text_title <- df_text %>% unnest_tokens(word, Course.Title) %>% anti_join(stop_words)

0.4 difference between MITx and Harvardx

What is the difference between MITx and Harvardx regarding the number of participants per year?

data %>% group_by(Institution) %>% mutate(students = cumsum( Participants..Course.Content.Accessed.)) %>% 
ggplot(aes(x = Launch.Date, y = students))+
  geom_line(aes(y = students , color = Institution), size = 1)+
  theme_dark()+
  ylab("Number of members") +
  xlab("Launch date")+
  ggtitle("Difference between MITx and Harvardx") -> graph2

ggplotly(graph2)
#data %>% group_by(Institution) %>% mutate(students = cumsum( Participants..Course.Content.Accessed.))

0.5 median age

What is the median age of the certified students?

data  %>% filter(X..Certified > 0) %>% 
ggplot()+
  geom_boxplot(aes(y= Median.Age, x = 1), alpha = 0, color = "red")+
  geom_violin(aes(y= Median.Age, x = 1), fill = "blue", alpha = 0.5)+
  theme_classic() -> graph5

ggplotly(graph5)

0.6 Flows

Tackle yourself and think of one more question from your own. Then, answer it using learning analytics (you can use machine learning, process mining, or any kind of visualization).

data %>% 
ggplot()+
  geom_boxplot(aes(x = 1, y = Participants..Course.Content.Accessed.))+
  geom_boxplot(aes(x = 2, y = Certified))

plot_ly(
    type = "sankey",
    orientation = "h",

    node = list(
      label = c("Enrolled", "Drop Out", "Audited", "Certified", "Watched videos", "Uncertified"),
      color = c("blue", "red", "blue", "green", "blue", "orange"),
      pad = 15,
      thickness = 20,
      line = list(
        color = "black",
        width = 0.5
      )
    ),

    link = list(
      source = c(0, 2, 2, 0, 4, 4),
      target = c(1, 3, 5, 4, 2, 1),
      value =  c(1537899, 244705, 494555, 2911958, 739260, 2172698)
    )
  ) %>% 
  layout(
    title = "Flows of students",
    font = list(
      size = 16
    )
)
LS0tCnRpdGxlOiAibGVhcm5pbmcgYW5hbHlzaXMiCmxpbmtjb2xvcjogdmlvbGV0Cm91dHB1dDoKICBodG1sX2RvY3VtZW50OgogICAgY29kZV9kb3dubG9hZDogdHJ1ZQogICAgdGhlbWU6IGNvc21vCiAgICBudW1iZXJfc2VjdGlvbnM6IHRydWUKICAgIHRvYzogdHJ1ZQogICAgdG9jX2RlcHRoOiA0CiAgICB0b2NfZmxvYXQ6IHRydWUKICAgIGRmX3ByaW50OiBwYWdlZAogICAgZmlnX2NhcHRpb246IHRydWUKICBwZGZfZG9jdW1lbnQ6CiAgICB0b2M6IHRydWUKLS0tCgpgYGB7ciBzZXR1cCwgaW5jbHVkZT1GQUxTRX0Ka25pdHI6Om9wdHNfY2h1bmskc2V0KGVjaG8gPSBUUlVFLCB3YXJuaW5nID0gRkFMU0UsIGVycm9yID0gRkFMU0UsIG1lc3NhZ2UgPSBGQUxTRSkKYGBgCgoKIyMgTGlicmFyeQoKYGBge3J9CmxpYnJhcnkoInJlYWRyIikKbGlicmFyeShkcGx5cikKbGlicmFyeShsdWJyaWRhdGUpCmRhdGEgPC0gcmVhZC5jc3YoIn4vbm9vYnNRQS9URUwvYXBwZW5kaXguY3N2IikKaGVhZChkYXRhKQpgYGAKCiMjIE1PT0NzIGFyZSBncm93aW5nCgpIb3cgdGhlIE1PT0NzIGFyZSBncm93aW5nIGZyb20gMjAxMiB0aWxsIDIwMTY/IChoaW50OiBVc2UgbGluZSBncmFwaCwgYmFyZ3JhcGguLmV0Yy4gU2hvdyB5ZWFycywgdGhlIG51bWJlciBvZiBwYXItdGljaXBhbnRzLCBhbmQgdGhlIG51bWJlciBvZiBNT09DcyBpbiAxIHZpc3VhbGl6YXRpb24gcGxvdCkuCgoKYGBge3J9CmRhdGEkTGF1bmNoLkRhdGUgPSBhcy5EYXRlKGRhdGEkTGF1bmNoLkRhdGUsIGZvcm1hdCA9ICIlbS8lZC8lWSIpCgpgYGAKCgpgYGB7cn0KbGlicmFyeShnZ3Bsb3QyKQpvcHRpb25zKHNjaXBlbj05OTkpCmdyYXBoMSA8LSBnZ3Bsb3QoZGF0YSA9IGRhdGEsIGFlcyh4ID0gTGF1bmNoLkRhdGUsIHkgPSBQYXJ0aWNpcGFudHMuLkNvdXJzZS5Db250ZW50LkFjY2Vzc2VkLikpKwogIGdlb21fbGluZShhZXMoeSA9IGN1bXN1bShQYXJ0aWNpcGFudHMuLkNvdXJzZS5Db250ZW50LkFjY2Vzc2VkLikpLCBjb2xvciA9ICJyZWQiLCBzaXplID0gMykrCiAgdGhlbWVfbWluaW1hbCgpKwogIHlsYWIoIk51bWJlciBvZiBtZW1iZXJzIikgKwogIHhsYWIoIkxhdW5jaCBkYXRlIikrCiAgZ2d0aXRsZSgiTU9PQ3MgYXJlIGdyb3dpbmcgZnJvbSAyMDEyIHRpbGwgMjAxNiIpCgpncmFwaDEKCgpsaWJyYXJ5KHBsb3RseSkKZ2dwbG90bHkoZ3JhcGgxKQoKYGBgCgoKVXNlIHRleHQgYW5hbHlzaXMgdG8gc2hvdyB0aGUgbW9zdCBmcmVxdWVudCBjb3Vyc2UgdGl0bGVzLCBhbmQgYW5vdGhlciBmb3IgaW5zdHJ1Y3RvcnMuIChoaW50MTogd29yZGNsb3VkOyBoaW50Mjp3YXRjaG91dCBmb3Igbm9pc3kgZGF0YSkuCgojIyB0ZXh0IGFuYWx5c2lzCgpgYGB7cn0KZGF0YSRDb3Vyc2UuVGl0bGUgPC0gYXMuY2hhcmFjdGVyKGRhdGEkQ291cnNlLlRpdGxlKQoKZGF0YV93YyA8LSBkYXRhICU+JSBncm91cF9ieShDb3Vyc2UuVGl0bGUpICU+JSBzdW1tYXJpc2UoY291bnQgPSBuKCkpICU+JSBhcnJhbmdlKC1jb3VudCkgJT4lIGZpbHRlcihjb3VudCA+IDEpCgpsaWJyYXJ5KGdnd29yZGNsb3VkKQoKZ2dwbG90KGRhdGFfd2MsIGFlcyhsYWJlbCA9IENvdXJzZS5UaXRsZSwgc2l6ZSA9IGNvdW50KSkgKwogIGdlb21fdGV4dF93b3JkY2xvdWQoKSArCiAgdGhlbWVfbWluaW1hbCgpCgoKYGBgCgpgYGB7cn0KbGlicmFyeShzdHJpbmdyKQoKZGF0YSRJbnN0cnVjdG9ycyA8LSBhcy5jaGFyYWN0ZXIoZGF0YSRJbnN0cnVjdG9ycykKCiNkYXRhX3djMiA8LSBkYXRhICU+JSBncm91cF9ieShJbnN0cnVjdG9ycykgJT4lIHN1bW1hcmlzZShjb3VudCA9IG4oKSkgJT4lIGFycmFuZ2UoLWNvdW50KSAlPiUgZmlsdGVyKGNvdW50ID4gMSkKCmRhdGExPSBkYXRhCgpkYXRhMyA9IGRhdGEgJT4lIHRpZHlyOjpzZXBhcmF0ZV9yb3dzKEluc3RydWN0b3JzLCBzZXAgPSIsIikKI2RhdGEzID0gZGF0YTMgJT4lIHRpZHlyOjpnYXRoZXIoa2V5ID0gIkluc3RydWN0b3IiLCB2YWx1ZSA9ICJhIiwgImIiLCAiYyIsICJkIiwgImYiKQoKCmRhdGEzJEluc3RydWN0b3JzIDwtICB0cmltd3MoZGF0YTMkSW5zdHJ1Y3RvcnMpCgpkYXRhMyRJbnN0cnVjdG9ycyA8LSBzdHJfcmVwbGFjZV9hbGwoZGF0YTMkSW5zdHJ1Y3RvcnMsICJbXFwkLF0iLCAiIikKZGF0YTMkSW5zdHJ1Y3RvcnMgPC0gc3RyX3JlcGxhY2VfYWxsKGRhdGEzJEluc3RydWN0b3JzLCAiW1xcQCxdIiwgImEiKQpkYXRhMyRJbnN0cnVjdG9ycyA8LSBzdHJfcmVwbGFjZV9hbGwoZGF0YTMkSW5zdHJ1Y3RvcnMsICIoQm9iKSIsICIiKQpkYXRhMyRJbnN0cnVjdG9ycyA8LSBzdHJfcmVwbGFjZV9hbGwoZGF0YTMkSW5zdHJ1Y3RvcnMsICJbXFwoXSIsICIiKQpkYXRhMyRJbnN0cnVjdG9ycyA8LSBzdHJfcmVwbGFjZV9hbGwoZGF0YTMkSW5zdHJ1Y3RvcnMsICJbXFwpXSIsICIiKQoKZGF0YV93YzIgPSBkYXRhMyAlPiUgZ3JvdXBfYnkoSW5zdHJ1Y3RvcnMpICU+JSBzdW1tYXJpc2UoY291bnQgPSBuKCkpICU+JSBhcnJhbmdlKC1jb3VudCkgJT4lIGZpbHRlcihjb3VudCA+IDEpCgpsaWJyYXJ5KGdnd29yZGNsb3VkKQoKZ3JhcGg2ID0gZ2dwbG90KGRhdGFfd2MyLCBhZXMobGFiZWwgPSBJbnN0cnVjdG9ycywgc2l6ZSA9IGNvdW50KSkgKwogIGdlb21fdGV4dF93b3JkY2xvdWQoKSArCiAgdGhlbWVfbWluaW1hbCgpCgpncmFwaDYKCmBgYAoKCgpgYGB7cn0KbGlicmFyeSh0aWR5dGV4dCkKbGlicmFyeShndXRlbmJlcmdyKQpsaWJyYXJ5KHdvcmRjbG91ZDIpCmBgYAoKYGBge3J9CmRmX3RleHQgPC0gZGF0YSAlPiUgc2VsZWN0KENvdXJzZS5UaXRsZSwgSW5zdGl0dXRpb24pCgpkZl90ZXh0X3RpdGxlIDwtIGRmX3RleHQgJT4lIHVubmVzdF90b2tlbnMod29yZCwgQ291cnNlLlRpdGxlKSAlPiUgYW50aV9qb2luKHN0b3Bfd29yZHMpCmBgYAoKIyMgZGlmZmVyZW5jZSBiZXR3ZWVuIE1JVHggYW5kIEhhcnZhcmR4CiAKV2hhdCBpcyB0aGUgZGlmZmVyZW5jZSBiZXR3ZWVuIE1JVHggYW5kIEhhcnZhcmR4IHJlZ2FyZGluZyB0aGUgbnVtYmVyIG9mIHBhcnRpY2lwYW50cyBwZXIgeWVhcj8KCgpgYGB7cn0KZGF0YSAlPiUgZ3JvdXBfYnkoSW5zdGl0dXRpb24pICU+JSBtdXRhdGUoc3R1ZGVudHMgPSBjdW1zdW0oIFBhcnRpY2lwYW50cy4uQ291cnNlLkNvbnRlbnQuQWNjZXNzZWQuKSkgJT4lIApnZ3Bsb3QoYWVzKHggPSBMYXVuY2guRGF0ZSwgeSA9IHN0dWRlbnRzKSkrCiAgZ2VvbV9saW5lKGFlcyh5ID0gc3R1ZGVudHMgLCBjb2xvciA9IEluc3RpdHV0aW9uKSwgc2l6ZSA9IDEpKwogIHRoZW1lX2RhcmsoKSsKICB5bGFiKCJOdW1iZXIgb2YgbWVtYmVycyIpICsKICB4bGFiKCJMYXVuY2ggZGF0ZSIpKwogIGdndGl0bGUoIkRpZmZlcmVuY2UgYmV0d2VlbiBNSVR4IGFuZCBIYXJ2YXJkeCIpIC0+IGdyYXBoMgoKZ2dwbG90bHkoZ3JhcGgyKQpgYGAKCmBgYHtyfQojZGF0YSAlPiUgZ3JvdXBfYnkoSW5zdGl0dXRpb24pICU+JSBtdXRhdGUoc3R1ZGVudHMgPSBjdW1zdW0oIFBhcnRpY2lwYW50cy4uQ291cnNlLkNvbnRlbnQuQWNjZXNzZWQuKSkKYGBgCgoKIyMgbWVkaWFuIGFnZQoKV2hhdCBpcyB0aGUgbWVkaWFuIGFnZSBvZiB0aGUgY2VydGlmaWVkIHN0dWRlbnRzPwoKCmBgYHtyfQpkYXRhICAlPiUgZmlsdGVyKFguLkNlcnRpZmllZCA+IDApICU+JSAKZ2dwbG90KCkrCiAgZ2VvbV9ib3hwbG90KGFlcyh5PSBNZWRpYW4uQWdlLCB4ID0gMSksIGFscGhhID0gMCwgY29sb3IgPSAicmVkIikrCiAgZ2VvbV92aW9saW4oYWVzKHk9IE1lZGlhbi5BZ2UsIHggPSAxKSwgZmlsbCA9ICJibHVlIiwgYWxwaGEgPSAwLjUpKwogIHRoZW1lX2NsYXNzaWMoKSAtPiBncmFwaDUKCmdncGxvdGx5KGdyYXBoNSkKYGBgCgojIyBGbG93cwoKVGFja2xlIHlvdXJzZWxmIGFuZCB0aGluayBvZiBvbmUgbW9yZSBxdWVzdGlvbiBmcm9tIHlvdXIgb3duLiBUaGVuLCBhbnN3ZXIgaXQgdXNpbmcgbGVhcm5pbmcgYW5hbHl0aWNzICh5b3UgY2FuIHVzZSBtYWNoaW5lIGxlYXJuaW5nLCBwcm9jZXNzIG1pbmluZywgb3IgYW55IGtpbmQgb2YgdmlzdWFsaXphdGlvbikuCgoKYGBge3J9CmRhdGEgJT4lIApnZ3Bsb3QoKSsKICBnZW9tX2JveHBsb3QoYWVzKHggPSAxLCB5ID0gUGFydGljaXBhbnRzLi5Db3Vyc2UuQ29udGVudC5BY2Nlc3NlZC4pKSsKICBnZW9tX2JveHBsb3QoYWVzKHggPSAyLCB5ID0gQ2VydGlmaWVkKSkKYGBgCgpgYGB7cn0KcGxvdF9seSgKICAgIHR5cGUgPSAic2Fua2V5IiwKICAgIG9yaWVudGF0aW9uID0gImgiLAoKICAgIG5vZGUgPSBsaXN0KAogICAgICBsYWJlbCA9IGMoIkVucm9sbGVkIiwgIkRyb3AgT3V0IiwgIkF1ZGl0ZWQiLCAiQ2VydGlmaWVkIiwgIldhdGNoZWQgdmlkZW9zIiwgIlVuY2VydGlmaWVkIiksCiAgICAgIGNvbG9yID0gYygiYmx1ZSIsICJyZWQiLCAiYmx1ZSIsICJncmVlbiIsICJibHVlIiwgIm9yYW5nZSIpLAogICAgICBwYWQgPSAxNSwKICAgICAgdGhpY2tuZXNzID0gMjAsCiAgICAgIGxpbmUgPSBsaXN0KAogICAgICAgIGNvbG9yID0gImJsYWNrIiwKICAgICAgICB3aWR0aCA9IDAuNQogICAgICApCiAgICApLAoKICAgIGxpbmsgPSBsaXN0KAogICAgICBzb3VyY2UgPSBjKDAsIDIsIDIsIDAsIDQsIDQpLAogICAgICB0YXJnZXQgPSBjKDEsIDMsIDUsIDQsIDIsIDEpLAogICAgICB2YWx1ZSA9ICBjKDE1Mzc4OTksIDI0NDcwNSwgNDk0NTU1LCAyOTExOTU4LCA3MzkyNjAsIDIxNzI2OTgpCiAgICApCiAgKSAlPiUgCiAgbGF5b3V0KAogICAgdGl0bGUgPSAiRmxvd3Mgb2Ygc3R1ZGVudHMiLAogICAgZm9udCA9IGxpc3QoCiAgICAgIHNpemUgPSAxNgogICAgKQopCgoKYGBgCgo=