This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Cmd+Shift+Enter.

filepath <- "/Users/jkwan/Documents/INST314/MoocsFinal.csv"
moocs_data <- read.csv(filepath) 
head(moocs_data)
str(moocs_data)
'data.frame':   5300 obs. of  18 variables:
 $ course_id        : chr  "HarvardX/CS50x/2012" "HarvardX/CS50x/2012" "MITx/6.00x/2012_Fall" "HarvardX/CS50x/2012" ...
 $ userid_DI        : chr  "MHxPC130428093" "MHxPC130299882" "MHxPC130417083" "MHxPC130120970" ...
 $ registered       : int  1 1 1 1 1 1 1 1 1 1 ...
 $ viewed           : int  1 1 0 1 1 1 0 0 1 1 ...
 $ explored         : int  0 0 0 0 0 0 0 0 0 0 ...
 $ certified        : int  0 0 0 0 0 0 0 0 0 0 ...
 $ final_cc_cname_DI: chr  "United States" "United States" "Brazil" "United States" ...
 $ LoE_DI           : chr  "Master's" "Secondary" "Secondary" "Master's" ...
 $ YoB              : int  1989 1956 1992 1979 1986 1973 1989 1995 1991 1993 ...
 $ gender           : chr  "f" "m" "m" "m" ...
 $ grade            : num  0 0 0 0 0 0 0 NA 0 0 ...
 $ start_time_DI    : chr  "2013-03-04" "2012-10-15" "2012-08-24" "2013-04-20" ...
 $ last_event_DI    : chr  "" "" "2012-08-24" "2013-04-23" ...
 $ nevents          : int  0 0 2 14 0 13 0 1 0 16 ...
 $ ndays_act        : int  0 0 1 2 0 1 0 1 0 1 ...
 $ nplay_video      : int  0 0 0 0 0 0 0 0 0 0 ...
 $ nchapters        : int  1 2 0 1 1 1 0 0 2 2 ...
 $ nforum_posts     : int  0 0 0 0 0 0 0 0 0 0 ...
# **Question 2:** 
summary(moocs_data$viewed)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 0.0000  0.0000  1.0000  0.6149  1.0000  1.0000 
summary(moocs_data$explored)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
0.00000 0.00000 0.00000 0.05943 0.00000 1.00000 
summary(moocs_data$certified)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
0.00000 0.00000 0.00000 0.02585 0.00000 1.00000 
summary(moocs_data$nforum_posts)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
0.00000 0.00000 0.00000 0.01132 0.00000 4.00000 
summary(moocs_data$nevents)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
    0.0     0.0     3.0   266.7    61.0 17828.0 
summary(moocs_data$ndays_act)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  0.000   0.000   1.000   4.048   3.000 162.000 
summary(moocs_data$nplay_video)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   0.00    0.00    0.00   29.93    1.00 6209.00 
# Question 3: 
table(moocs_data$LoE, moocs_data$certified)
                     
                         0    1
  Bachelor's          2021   53
  Doctorate            122    3
  Less than Secondary  140    6
  Master's            1189   34
  Secondary           1691   41
prop.table(table(moocs_data$LoE, moocs_data$certified), margin = 1)
                     
                               0          1
  Bachelor's          0.97444552 0.02555448
  Doctorate           0.97600000 0.02400000
  Less than Secondary 0.95890411 0.04109589
  Master's            0.97219951 0.02780049
  Secondary           0.97632794 0.02367206
# Question 3 Visualization (Bar Chart) 
library(ggplot2)
ggplot(moocs_data, aes(x = LoE_DI, fill = as.factor(certified))) +
  geom_bar(position = "fill") +
  labs(title = "Certification Rate by Education Level",
       x = "Level of Education",
       y = "Proportion",
       fill = "Certified (1=Yes, 0=No)") +
  theme_minimal()

# Question 4
summary(moocs_data$nforum_posts)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
0.00000 0.00000 0.00000 0.01132 0.00000 4.00000 
tapply(moocs_data$nforum_posts, moocs_data$certified, summary)
$`0`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
0.00000 0.00000 0.00000 0.01026 0.00000 4.00000 

$`1`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
0.00000 0.00000 0.00000 0.05109 0.00000 3.00000 
#Question 4 Visualization (Bar graph)
library(ggplot2)

# Create a categorical variable for forum posts
moocs_data$forum_activity <- ifelse(moocs_data$nforum_posts > 0, "Posted", "Did Not Post")

# Create bar chart
ggplot(moocs_data, aes(x = forum_activity, fill = as.factor(certified))) +
  geom_bar(position = "fill") +
  labs(title = "Forum Participation vs. Certification",
       x = "Forum Activity",
       y = "Proportion",
       fill = "Certified (1=Yes, 0=No)") +
  theme_minimal()

# Quetion 5 
subset_data <- subset(moocs_data, LoE_DI %in% c("Less than Secondary", "Secondary"))
summary(subset_data$nforum_posts)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
0.00000 0.00000 0.00000 0.01384 0.00000 3.00000 
#Certification Rates
table(subset_data$certified) 

   0    1 
1831   47 
prop.table(table(subset_data$certified))

         0          1 
0.97497338 0.02502662 
# Forum Posts vs. Certification Breakdown
tapply(subset_data$nforum_posts, subset_data$certified, summary)
$`0`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
0.00000 0.00000 0.00000 0.01256 0.00000 3.00000 

$`1`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
0.00000 0.00000 0.00000 0.06383 0.00000 3.00000 
# Question 5
library(ggplot2)

# Create a categorical variable for forum posts
subset_data$forum_activity <- ifelse(subset_data$nforum_posts > 0, "Posted", "Did Not Post")

# Create bar chart
ggplot(subset_data, aes(x = forum_activity, fill = as.factor(certified))) +
  geom_bar(position = "fill") +
  labs(title = "Forum Participation vs. Certification (Non-Bachelor's)",
       x = "Forum Activity",
       y = "Proportion",
       fill = "Certified (1=Yes, 0=No)") +
  theme_minimal()

Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Cmd+Option+I.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Cmd+Shift+K to preview the HTML file).

The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.

LS0tCnRpdGxlOiAiTGFiIDIiCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KClRoaXMgaXMgYW4gW1IgTWFya2Rvd25dKGh0dHA6Ly9ybWFya2Rvd24ucnN0dWRpby5jb20pIE5vdGVib29rLiBXaGVuIHlvdSBleGVjdXRlIGNvZGUgd2l0aGluIHRoZSBub3RlYm9vaywgdGhlIHJlc3VsdHMgYXBwZWFyIGJlbmVhdGggdGhlIGNvZGUuCgpUcnkgZXhlY3V0aW5nIHRoaXMgY2h1bmsgYnkgY2xpY2tpbmcgdGhlICpSdW4qIGJ1dHRvbiB3aXRoaW4gdGhlIGNodW5rIG9yIGJ5IHBsYWNpbmcgeW91ciBjdXJzb3IgaW5zaWRlIGl0IGFuZCBwcmVzc2luZyAqQ21kK1NoaWZ0K0VudGVyKi4KCmBgYHtyfQpmaWxlcGF0aCA8LSAiL1VzZXJzL2prd2FuL0RvY3VtZW50cy9JTlNUMzE0L01vb2NzRmluYWwuY3N2Igptb29jc19kYXRhIDwtIHJlYWQuY3N2KGZpbGVwYXRoKSAKaGVhZChtb29jc19kYXRhKQpzdHIobW9vY3NfZGF0YSkKYGBgCgpgYGB7cn0KIyBRdWVzdGlvbiAyOiAKc3VtbWFyeShtb29jc19kYXRhJHZpZXdlZCkKc3VtbWFyeShtb29jc19kYXRhJGV4cGxvcmVkKQpzdW1tYXJ5KG1vb2NzX2RhdGEkY2VydGlmaWVkKQpzdW1tYXJ5KG1vb2NzX2RhdGEkbmZvcnVtX3Bvc3RzKQpzdW1tYXJ5KG1vb2NzX2RhdGEkbmV2ZW50cykKc3VtbWFyeShtb29jc19kYXRhJG5kYXlzX2FjdCkKc3VtbWFyeShtb29jc19kYXRhJG5wbGF5X3ZpZGVvKQpgYGAKCmBgYHtyfQojIFF1ZXN0aW9uIDM6IAp0YWJsZShtb29jc19kYXRhJExvRSwgbW9vY3NfZGF0YSRjZXJ0aWZpZWQpCnByb3AudGFibGUodGFibGUobW9vY3NfZGF0YSRMb0UsIG1vb2NzX2RhdGEkY2VydGlmaWVkKSwgbWFyZ2luID0gMSkKYGBgCgpgYGB7cn0KIyBRdWVzdGlvbiAzIFZpc3VhbGl6YXRpb24gKEJhciBDaGFydCkgCmxpYnJhcnkoZ2dwbG90MikKZ2dwbG90KG1vb2NzX2RhdGEsIGFlcyh4ID0gTG9FX0RJLCBmaWxsID0gYXMuZmFjdG9yKGNlcnRpZmllZCkpKSArCiAgZ2VvbV9iYXIocG9zaXRpb24gPSAiZmlsbCIpICsKICBsYWJzKHRpdGxlID0gIkNlcnRpZmljYXRpb24gUmF0ZSBieSBFZHVjYXRpb24gTGV2ZWwiLAogICAgICAgeCA9ICJMZXZlbCBvZiBFZHVjYXRpb24iLAogICAgICAgeSA9ICJQcm9wb3J0aW9uIiwKICAgICAgIGZpbGwgPSAiQ2VydGlmaWVkICgxPVllcywgMD1ObykiKSArCiAgdGhlbWVfbWluaW1hbCgpCmBgYApgYGB7cn0KIyBRdWVzdGlvbiA0CnN1bW1hcnkobW9vY3NfZGF0YSRuZm9ydW1fcG9zdHMpCnRhcHBseShtb29jc19kYXRhJG5mb3J1bV9wb3N0cywgbW9vY3NfZGF0YSRjZXJ0aWZpZWQsIHN1bW1hcnkpCmBgYApgYGB7cn0KI1F1ZXN0aW9uIDQgVmlzdWFsaXphdGlvbiAoQmFyIGdyYXBoKQpsaWJyYXJ5KGdncGxvdDIpCgojIENyZWF0ZSBhIGNhdGVnb3JpY2FsIHZhcmlhYmxlIGZvciBmb3J1bSBwb3N0cwptb29jc19kYXRhJGZvcnVtX2FjdGl2aXR5IDwtIGlmZWxzZShtb29jc19kYXRhJG5mb3J1bV9wb3N0cyA+IDAsICJQb3N0ZWQiLCAiRGlkIE5vdCBQb3N0IikKCiMgQ3JlYXRlIGJhciBjaGFydApnZ3Bsb3QobW9vY3NfZGF0YSwgYWVzKHggPSBmb3J1bV9hY3Rpdml0eSwgZmlsbCA9IGFzLmZhY3RvcihjZXJ0aWZpZWQpKSkgKwogIGdlb21fYmFyKHBvc2l0aW9uID0gImZpbGwiKSArCiAgbGFicyh0aXRsZSA9ICJGb3J1bSBQYXJ0aWNpcGF0aW9uIHZzLiBDZXJ0aWZpY2F0aW9uIiwKICAgICAgIHggPSAiRm9ydW0gQWN0aXZpdHkiLAogICAgICAgeSA9ICJQcm9wb3J0aW9uIiwKICAgICAgIGZpbGwgPSAiQ2VydGlmaWVkICgxPVllcywgMD1ObykiKSArCiAgdGhlbWVfbWluaW1hbCgpCgpgYGAKYGBge3J9CiMgUXVldGlvbiA1IApzdWJzZXRfZGF0YSA8LSBzdWJzZXQobW9vY3NfZGF0YSwgTG9FX0RJICVpbiUgYygiTGVzcyB0aGFuIFNlY29uZGFyeSIsICJTZWNvbmRhcnkiKSkKc3VtbWFyeShzdWJzZXRfZGF0YSRuZm9ydW1fcG9zdHMpCiNDZXJ0aWZpY2F0aW9uIFJhdGVzCnRhYmxlKHN1YnNldF9kYXRhJGNlcnRpZmllZCkgCnByb3AudGFibGUodGFibGUoc3Vic2V0X2RhdGEkY2VydGlmaWVkKSkKIyBGb3J1bSBQb3N0cyB2cy4gQ2VydGlmaWNhdGlvbiBCcmVha2Rvd24KdGFwcGx5KHN1YnNldF9kYXRhJG5mb3J1bV9wb3N0cywgc3Vic2V0X2RhdGEkY2VydGlmaWVkLCBzdW1tYXJ5KQpgYGAKYGBge3J9CiMgUXVlc3Rpb24gNQpsaWJyYXJ5KGdncGxvdDIpCgojIENyZWF0ZSBhIGNhdGVnb3JpY2FsIHZhcmlhYmxlIGZvciBmb3J1bSBwb3N0cwpzdWJzZXRfZGF0YSRmb3J1bV9hY3Rpdml0eSA8LSBpZmVsc2Uoc3Vic2V0X2RhdGEkbmZvcnVtX3Bvc3RzID4gMCwgIlBvc3RlZCIsICJEaWQgTm90IFBvc3QiKQoKIyBDcmVhdGUgYmFyIGNoYXJ0CmdncGxvdChzdWJzZXRfZGF0YSwgYWVzKHggPSBmb3J1bV9hY3Rpdml0eSwgZmlsbCA9IGFzLmZhY3RvcihjZXJ0aWZpZWQpKSkgKwogIGdlb21fYmFyKHBvc2l0aW9uID0gImZpbGwiKSArCiAgbGFicyh0aXRsZSA9ICJGb3J1bSBQYXJ0aWNpcGF0aW9uIHZzLiBDZXJ0aWZpY2F0aW9uIChOb24tQmFjaGVsb3IncykiLAogICAgICAgeCA9ICJGb3J1bSBBY3Rpdml0eSIsCiAgICAgICB5ID0gIlByb3BvcnRpb24iLAogICAgICAgZmlsbCA9ICJDZXJ0aWZpZWQgKDE9WWVzLCAwPU5vKSIpICsKICB0aGVtZV9taW5pbWFsKCkKYGBgCgpBZGQgYSBuZXcgY2h1bmsgYnkgY2xpY2tpbmcgdGhlICpJbnNlcnQgQ2h1bmsqIGJ1dHRvbiBvbiB0aGUgdG9vbGJhciBvciBieSBwcmVzc2luZyAqQ21kK09wdGlvbitJKi4KCldoZW4geW91IHNhdmUgdGhlIG5vdGVib29rLCBhbiBIVE1MIGZpbGUgY29udGFpbmluZyB0aGUgY29kZSBhbmQgb3V0cHV0IHdpbGwgYmUgc2F2ZWQgYWxvbmdzaWRlIGl0IChjbGljayB0aGUgKlByZXZpZXcqIGJ1dHRvbiBvciBwcmVzcyAqQ21kK1NoaWZ0K0sqIHRvIHByZXZpZXcgdGhlIEhUTUwgZmlsZSkuCgpUaGUgcHJldmlldyBzaG93cyB5b3UgYSByZW5kZXJlZCBIVE1MIGNvcHkgb2YgdGhlIGNvbnRlbnRzIG9mIHRoZSBlZGl0b3IuIENvbnNlcXVlbnRseSwgdW5saWtlICpLbml0KiwgKlByZXZpZXcqIGRvZXMgbm90IHJ1biBhbnkgUiBjb2RlIGNodW5rcy4gSW5zdGVhZCwgdGhlIG91dHB1dCBvZiB0aGUgY2h1bmsgd2hlbiBpdCB3YXMgbGFzdCBydW4gaW4gdGhlIGVkaXRvciBpcyBkaXNwbGF5ZWQuCg==