1. Data Preparation

# Libraries

library(tidyverse)
library(statsr)
library(infer)
library(psych)

# Load data from Github repository

data <- read.csv("https://raw.githubusercontent.com/jnataky/DATA-607/master/A2_Various_dataset_transformation/students_performance.csv")


# Take a look at its structure

glimpse(data)

# Since there are many columns in data frame that we are not going to use
# We will subset this data frame to one including the variables of interest


data_sub <- data %>%
  select(test.preparation.course, math.score, writing.score, reading.score)

# We will create a variable called tests_score
# tests_score or student performance is the student average score 
# in math, writing, and reading.
# We will rename the variable test.preparation.course as test_prep
# Which describes if the student took test preparation test or not
# Those two variables are the variables we are interested in.

data_clean <- data_sub %>%
  transmute(test.preparation.course, tests_score = (math.score + writing.score + reading.score) / 3)

names(data_clean) <- c("test_prep", "tests_score")

# Check for missing values

sum(is.na(data_clean))


# Let create a new variable test_prep_course, which will be coded as either
# "yes" if they had a preparation course, and "no" if not
# And create a new dta frame that will keep this new variable and test_prep_score variable

data_final <- data_clean %>%
  transmute(test_prep_course = ifelse(data_clean$test_prep == "completed", "yes", "no"), tests_score)

2. Research question

Is the average tests score different from students who have test preparation course and those who don’t ?

3. Cases

Each case represents a student in the United States. There are 1000 observations in the given data set.

4. Data collection

Data is collected by kaggle. Data is submitted by a member to explore (exploratory data analysis) and build in a web-based data science environment.

The data can be accessed directly from the repository at Github

5. Type of study

This is an observational study.

6. Data Source

Data is from kaggle public datasets and can be found online here: https://www.kaggle.com/spscientist/students-performance-in-exams

7. Response

The response variable is mean tests score and is numerical.

8. Explanatory

The explanatory variable is test preparation course and is categorical.

9. Relevant summary statistics

# Create subsets for test preparation course and test performance

test_prep_yes <- data_final %>%
  filter(test_prep_course == "yes")

test_prep_no <- data_final %>%
  filter(test_prep_course == "no")

# Side-by-side boxplot to visualize the relationship between the two variables

boxplot(test_prep_yes$tests_score, test_prep_no$tests_score,
        names = c("Test score with preparation", "Test score with no preparation"))

# Summary for students who had test preparation course

describe(test_prep_yes$tests_score)
##    vars   n  mean    sd median trimmed  mad   min max range  skew kurtosis   se
## X1    1 358 72.67 13.04   73.5   73.03 12.6 34.33 100 65.67 -0.26    -0.26 0.69
# Summary for students who had no test preparation course

describe(test_prep_no$tests_score)
##    vars   n  mean    sd median trimmed   mad min max range  skew kurtosis   se
## X1    1 642 65.04 14.19  65.33   65.33 14.33   9 100    91 -0.28     0.22 0.56
# Histogram for students who had test preparation course
# To visualize the distribution (shape, center,...)

ggplot(test_prep_yes, aes(x = tests_score)) + 
  geom_histogram() +
  ggtitle("Distribution of students who had test preparation course")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Histogram for students who had no test preparation course
# To visualize the distribution (shape, center,...)

ggplot(test_prep_no, aes(x = tests_score)) + 
  geom_histogram() +
  ggtitle("Distribution of students who had no test preparation course")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

LS0tDQp0aXRsZTogREFUQSA2MDYgRGF0YSBQcm9qZWN0IFByb3Bvc2FsDQphdXRob3I6IEplcmVkIEF0YWt5DQpkYXRlOiAiMjAyMC0xMC0yNSINCm91dHB1dDogDQogIG9wZW5pbnRybzo6bGFiX3JlcG9ydDogZGVmYXVsdA0KICBodG1sX2RvY3VtZW50Og0KICAgIG51bWJlcl9zZWN0aW9uczogeWVzDQotLS0NCi0tLQ0KDQojIyMgMS4gRGF0YSBQcmVwYXJhdGlvbg0KDQoNCmBgYHtyIHNldHVwLCBlY2hvPVRSVUUsIHJlc3VsdHM9J2hpZGUnLCB3YXJuaW5nPUZBTFNFLCBtZXNzYWdlPUZBTFNFfQ0KDQojIExpYnJhcmllcw0KDQpsaWJyYXJ5KHRpZHl2ZXJzZSkNCmxpYnJhcnkoc3RhdHNyKQ0KbGlicmFyeShpbmZlcikNCmxpYnJhcnkocHN5Y2gpDQoNCiMgTG9hZCBkYXRhIGZyb20gR2l0aHViIHJlcG9zaXRvcnkNCg0KZGF0YSA8LSByZWFkLmNzdigiaHR0cHM6Ly9yYXcuZ2l0aHVidXNlcmNvbnRlbnQuY29tL2puYXRha3kvREFUQS02MDcvbWFzdGVyL0EyX1ZhcmlvdXNfZGF0YXNldF90cmFuc2Zvcm1hdGlvbi9zdHVkZW50c19wZXJmb3JtYW5jZS5jc3YiKQ0KDQoNCiMgVGFrZSBhIGxvb2sgYXQgaXRzIHN0cnVjdHVyZQ0KDQpnbGltcHNlKGRhdGEpDQoNCiMgU2luY2UgdGhlcmUgYXJlIG1hbnkgY29sdW1ucyBpbiBkYXRhIGZyYW1lIHRoYXQgd2UgYXJlIG5vdCBnb2luZyB0byB1c2UNCiMgV2Ugd2lsbCBzdWJzZXQgdGhpcyBkYXRhIGZyYW1lIHRvIG9uZSBpbmNsdWRpbmcgdGhlIHZhcmlhYmxlcyBvZiBpbnRlcmVzdA0KDQoNCmRhdGFfc3ViIDwtIGRhdGEgJT4lDQogIHNlbGVjdCh0ZXN0LnByZXBhcmF0aW9uLmNvdXJzZSwgbWF0aC5zY29yZSwgd3JpdGluZy5zY29yZSwgcmVhZGluZy5zY29yZSkNCg0KIyBXZSB3aWxsIGNyZWF0ZSBhIHZhcmlhYmxlIGNhbGxlZCB0ZXN0c19zY29yZQ0KIyB0ZXN0c19zY29yZSBvciBzdHVkZW50IHBlcmZvcm1hbmNlIGlzIHRoZSBzdHVkZW50IGF2ZXJhZ2Ugc2NvcmUgDQojIGluIG1hdGgsIHdyaXRpbmcsIGFuZCByZWFkaW5nLg0KIyBXZSB3aWxsIHJlbmFtZSB0aGUgdmFyaWFibGUgdGVzdC5wcmVwYXJhdGlvbi5jb3Vyc2UgYXMgdGVzdF9wcmVwDQojIFdoaWNoIGRlc2NyaWJlcyBpZiB0aGUgc3R1ZGVudCB0b29rIHRlc3QgcHJlcGFyYXRpb24gdGVzdCBvciBub3QNCiMgVGhvc2UgdHdvIHZhcmlhYmxlcyBhcmUgdGhlIHZhcmlhYmxlcyB3ZSBhcmUgaW50ZXJlc3RlZCBpbi4NCg0KZGF0YV9jbGVhbiA8LSBkYXRhX3N1YiAlPiUNCiAgdHJhbnNtdXRlKHRlc3QucHJlcGFyYXRpb24uY291cnNlLCB0ZXN0c19zY29yZSA9IChtYXRoLnNjb3JlICsgd3JpdGluZy5zY29yZSArIHJlYWRpbmcuc2NvcmUpIC8gMykNCg0KbmFtZXMoZGF0YV9jbGVhbikgPC0gYygidGVzdF9wcmVwIiwgInRlc3RzX3Njb3JlIikNCg0KIyBDaGVjayBmb3IgbWlzc2luZyB2YWx1ZXMNCg0Kc3VtKGlzLm5hKGRhdGFfY2xlYW4pKQ0KDQoNCiMgTGV0IGNyZWF0ZSBhIG5ldyB2YXJpYWJsZSB0ZXN0X3ByZXBfY291cnNlLCB3aGljaCB3aWxsIGJlIGNvZGVkIGFzIGVpdGhlcg0KIyAieWVzIiBpZiB0aGV5IGhhZCBhIHByZXBhcmF0aW9uIGNvdXJzZSwgYW5kICJubyIgaWYgbm90DQojIEFuZCBjcmVhdGUgYSBuZXcgZHRhIGZyYW1lIHRoYXQgd2lsbCBrZWVwIHRoaXMgbmV3IHZhcmlhYmxlIGFuZCB0ZXN0X3ByZXBfc2NvcmUgdmFyaWFibGUNCg0KZGF0YV9maW5hbCA8LSBkYXRhX2NsZWFuICU+JQ0KICB0cmFuc211dGUodGVzdF9wcmVwX2NvdXJzZSA9IGlmZWxzZShkYXRhX2NsZWFuJHRlc3RfcHJlcCA9PSAiY29tcGxldGVkIiwgInllcyIsICJubyIpLCB0ZXN0c19zY29yZSkNCg0KYGBgDQoNCg0KIyMjIDIuIFJlc2VhcmNoIHF1ZXN0aW9uIA0KDQpJcyB0aGUgYXZlcmFnZSB0ZXN0cyBzY29yZSBkaWZmZXJlbnQgZnJvbSBzdHVkZW50cyB3aG8NCmhhdmUgdGVzdCBwcmVwYXJhdGlvbiBjb3Vyc2UgYW5kIHRob3NlIHdobyBkb24ndCA/DQoNCg0KIyMjIDMuIENhc2VzIA0KDQpFYWNoIGNhc2UgcmVwcmVzZW50cyBhIHN0dWRlbnQgaW4gdGhlIFVuaXRlZCBTdGF0ZXMuIFRoZXJlIGFyZSAxMDAwIG9ic2VydmF0aW9ucyBpbiB0aGUgZ2l2ZW4gZGF0YSBzZXQuDQoNCg0KIyMjIDQuIERhdGEgY29sbGVjdGlvbiANCg0KDQpEYXRhIGlzIGNvbGxlY3RlZCBieSBba2FnZ2xlXShodHRwczovL3d3dy5rYWdnbGUuY29tL3Nwc2NpZW50aXN0L3N0dWRlbnRzLXBlcmZvcm1hbmNlLWluLWV4YW1zKS4NCkRhdGEgaXMgc3VibWl0dGVkIGJ5IGEgbWVtYmVyIHRvIGV4cGxvcmUgKGV4cGxvcmF0b3J5IGRhdGEgYW5hbHlzaXMpIGFuZCBidWlsZCBpbiBhIA0Kd2ViLWJhc2VkIGRhdGEgc2NpZW5jZSBlbnZpcm9ubWVudC4NCg0KVGhlIGRhdGEgY2FuIGJlIGFjY2Vzc2VkIGRpcmVjdGx5IGZyb20gdGhlIHJlcG9zaXRvcnkgYXQgW0dpdGh1Yl0oaHR0cHM6Ly9yYXcuZ2l0aHVidXNlcmNvbnRlbnQuY29tL2puYXRha3kvREFUQS02MDcvbWFzdGVyL0EyX1ZhcmlvdXNfZGF0YXNldF90cmFuc2Zvcm1hdGlvbi9zdHVkZW50c19wZXJmb3JtYW5jZS5jc3YpDQoNCg0KIyMjIDUuIFR5cGUgb2Ygc3R1ZHkgDQoNCg0KVGhpcyBpcyBhbiBvYnNlcnZhdGlvbmFsIHN0dWR5Lg0KDQoNCiMjIyA2LiBEYXRhIFNvdXJjZSANCg0KRGF0YSBpcyBmcm9tIGthZ2dsZSBwdWJsaWMgZGF0YXNldHMgYW5kIGNhbiBiZSBmb3VuZCBvbmxpbmUgaGVyZToNCmh0dHBzOi8vd3d3LmthZ2dsZS5jb20vc3BzY2llbnRpc3Qvc3R1ZGVudHMtcGVyZm9ybWFuY2UtaW4tZXhhbXMNCg0KDQoNCiMjIyA3LiBSZXNwb25zZSANCg0KVGhlIHJlc3BvbnNlIHZhcmlhYmxlIGlzIG1lYW4gdGVzdHMgc2NvcmUgYW5kIGlzIG51bWVyaWNhbC4NCg0KDQojIyMgOC4gRXhwbGFuYXRvcnkgDQoNClRoZSBleHBsYW5hdG9yeSB2YXJpYWJsZSBpcyB0ZXN0IHByZXBhcmF0aW9uIGNvdXJzZSBhbmQgaXMgY2F0ZWdvcmljYWwuDQoNCg0KIyMjIDkuIFJlbGV2YW50IHN1bW1hcnkgc3RhdGlzdGljcyANCg0KDQoNCg0KYGBge3J9DQoNCiMgQ3JlYXRlIHN1YnNldHMgZm9yIHRlc3QgcHJlcGFyYXRpb24gY291cnNlIGFuZCB0ZXN0IHBlcmZvcm1hbmNlDQoNCnRlc3RfcHJlcF95ZXMgPC0gZGF0YV9maW5hbCAlPiUNCiAgZmlsdGVyKHRlc3RfcHJlcF9jb3Vyc2UgPT0gInllcyIpDQoNCnRlc3RfcHJlcF9ubyA8LSBkYXRhX2ZpbmFsICU+JQ0KICBmaWx0ZXIodGVzdF9wcmVwX2NvdXJzZSA9PSAibm8iKQ0KDQojIFNpZGUtYnktc2lkZSBib3hwbG90IHRvIHZpc3VhbGl6ZSB0aGUgcmVsYXRpb25zaGlwIGJldHdlZW4gdGhlIHR3byB2YXJpYWJsZXMNCg0KYm94cGxvdCh0ZXN0X3ByZXBfeWVzJHRlc3RzX3Njb3JlLCB0ZXN0X3ByZXBfbm8kdGVzdHNfc2NvcmUsDQogICAgICAgIG5hbWVzID0gYygiVGVzdCBzY29yZSB3aXRoIHByZXBhcmF0aW9uIiwgIlRlc3Qgc2NvcmUgd2l0aCBubyBwcmVwYXJhdGlvbiIpKQ0KYGBgDQoNCg0KDQpgYGB7cn0NCg0KIyBTdW1tYXJ5IGZvciBzdHVkZW50cyB3aG8gaGFkIHRlc3QgcHJlcGFyYXRpb24gY291cnNlDQoNCmRlc2NyaWJlKHRlc3RfcHJlcF95ZXMkdGVzdHNfc2NvcmUpDQoNCmBgYA0KDQoNCg0KDQpgYGB7cn0NCg0KIyBTdW1tYXJ5IGZvciBzdHVkZW50cyB3aG8gaGFkIG5vIHRlc3QgcHJlcGFyYXRpb24gY291cnNlDQoNCmRlc2NyaWJlKHRlc3RfcHJlcF9ubyR0ZXN0c19zY29yZSkNCg0KYGBgDQoNCg0KDQoNCmBgYHtyfQ0KDQojIEhpc3RvZ3JhbSBmb3Igc3R1ZGVudHMgd2hvIGhhZCB0ZXN0IHByZXBhcmF0aW9uIGNvdXJzZQ0KIyBUbyB2aXN1YWxpemUgdGhlIGRpc3RyaWJ1dGlvbiAoc2hhcGUsIGNlbnRlciwuLi4pDQoNCmdncGxvdCh0ZXN0X3ByZXBfeWVzLCBhZXMoeCA9IHRlc3RzX3Njb3JlKSkgKyANCiAgZ2VvbV9oaXN0b2dyYW0oKSArDQogIGdndGl0bGUoIkRpc3RyaWJ1dGlvbiBvZiBzdHVkZW50cyB3aG8gaGFkIHRlc3QgcHJlcGFyYXRpb24gY291cnNlIikNCg0KYGBgDQoNCg0KDQoNCg0KYGBge3J9DQoNCiMgSGlzdG9ncmFtIGZvciBzdHVkZW50cyB3aG8gaGFkIG5vIHRlc3QgcHJlcGFyYXRpb24gY291cnNlDQojIFRvIHZpc3VhbGl6ZSB0aGUgZGlzdHJpYnV0aW9uIChzaGFwZSwgY2VudGVyLC4uLikNCg0KZ2dwbG90KHRlc3RfcHJlcF9ubywgYWVzKHggPSB0ZXN0c19zY29yZSkpICsgDQogIGdlb21faGlzdG9ncmFtKCkgKw0KICBnZ3RpdGxlKCJEaXN0cmlidXRpb24gb2Ygc3R1ZGVudHMgd2hvIGhhZCBubyB0ZXN0IHByZXBhcmF0aW9uIGNvdXJzZSIpDQoNCmBgYA0KDQo=