1. Data Preparation
# Libraries
library(tidyverse)
library(statsr)
library(infer)
library(psych)
# Load data from Github repository
data <- read.csv("https://raw.githubusercontent.com/jnataky/DATA-607/master/A2_Various_dataset_transformation/students_performance.csv")
# Take a look at its structure
glimpse(data)
# Since there are many columns in data frame that we are not going to use
# We will subset this data frame to one including the variables of interest
data_sub <- data %>%
select(test.preparation.course, math.score, writing.score, reading.score)
# We will create a variable called tests_score
# tests_score or student performance is the student average score
# in math, writing, and reading.
# We will rename the variable test.preparation.course as test_prep
# Which describes if the student took test preparation test or not
# Those two variables are the variables we are interested in.
data_clean <- data_sub %>%
transmute(test.preparation.course, tests_score = (math.score + writing.score + reading.score) / 3)
names(data_clean) <- c("test_prep", "tests_score")
# Check for missing values
sum(is.na(data_clean))
# Let create a new variable test_prep_course, which will be coded as either
# "yes" if they had a preparation course, and "no" if not
# And create a new dta frame that will keep this new variable and test_prep_score variable
data_final <- data_clean %>%
transmute(test_prep_course = ifelse(data_clean$test_prep == "completed", "yes", "no"), tests_score)
2. Research question
Is the average tests score different from students who have test preparation course and those who don’t ?
3. Cases
Each case represents a student in the United States. There are 1000 observations in the given data set.
4. Data collection
Data is collected by kaggle. Data is submitted by a member to explore (exploratory data analysis) and build in a web-based data science environment.
The data can be accessed directly from the repository at Github
5. Type of study
This is an observational study.
7. Response
The response variable is mean tests score and is numerical.
8. Explanatory
The explanatory variable is test preparation course and is categorical.
9. Relevant summary statistics
# Create subsets for test preparation course and test performance
test_prep_yes <- data_final %>%
filter(test_prep_course == "yes")
test_prep_no <- data_final %>%
filter(test_prep_course == "no")
# Side-by-side boxplot to visualize the relationship between the two variables
boxplot(test_prep_yes$tests_score, test_prep_no$tests_score,
names = c("Test score with preparation", "Test score with no preparation"))

# Summary for students who had test preparation course
describe(test_prep_yes$tests_score)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 358 72.67 13.04 73.5 73.03 12.6 34.33 100 65.67 -0.26 -0.26 0.69
# Summary for students who had no test preparation course
describe(test_prep_no$tests_score)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 642 65.04 14.19 65.33 65.33 14.33 9 100 91 -0.28 0.22 0.56
# Histogram for students who had test preparation course
# To visualize the distribution (shape, center,...)
ggplot(test_prep_yes, aes(x = tests_score)) +
geom_histogram() +
ggtitle("Distribution of students who had test preparation course")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Histogram for students who had no test preparation course
# To visualize the distribution (shape, center,...)
ggplot(test_prep_no, aes(x = tests_score)) +
geom_histogram() +
ggtitle("Distribution of students who had no test preparation course")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

LS0tDQp0aXRsZTogREFUQSA2MDYgRGF0YSBQcm9qZWN0IFByb3Bvc2FsDQphdXRob3I6IEplcmVkIEF0YWt5DQpkYXRlOiAiMjAyMC0xMC0yNSINCm91dHB1dDogDQogIG9wZW5pbnRybzo6bGFiX3JlcG9ydDogZGVmYXVsdA0KICBodG1sX2RvY3VtZW50Og0KICAgIG51bWJlcl9zZWN0aW9uczogeWVzDQotLS0NCi0tLQ0KDQojIyMgMS4gRGF0YSBQcmVwYXJhdGlvbg0KDQoNCmBgYHtyIHNldHVwLCBlY2hvPVRSVUUsIHJlc3VsdHM9J2hpZGUnLCB3YXJuaW5nPUZBTFNFLCBtZXNzYWdlPUZBTFNFfQ0KDQojIExpYnJhcmllcw0KDQpsaWJyYXJ5KHRpZHl2ZXJzZSkNCmxpYnJhcnkoc3RhdHNyKQ0KbGlicmFyeShpbmZlcikNCmxpYnJhcnkocHN5Y2gpDQoNCiMgTG9hZCBkYXRhIGZyb20gR2l0aHViIHJlcG9zaXRvcnkNCg0KZGF0YSA8LSByZWFkLmNzdigiaHR0cHM6Ly9yYXcuZ2l0aHVidXNlcmNvbnRlbnQuY29tL2puYXRha3kvREFUQS02MDcvbWFzdGVyL0EyX1ZhcmlvdXNfZGF0YXNldF90cmFuc2Zvcm1hdGlvbi9zdHVkZW50c19wZXJmb3JtYW5jZS5jc3YiKQ0KDQoNCiMgVGFrZSBhIGxvb2sgYXQgaXRzIHN0cnVjdHVyZQ0KDQpnbGltcHNlKGRhdGEpDQoNCiMgU2luY2UgdGhlcmUgYXJlIG1hbnkgY29sdW1ucyBpbiBkYXRhIGZyYW1lIHRoYXQgd2UgYXJlIG5vdCBnb2luZyB0byB1c2UNCiMgV2Ugd2lsbCBzdWJzZXQgdGhpcyBkYXRhIGZyYW1lIHRvIG9uZSBpbmNsdWRpbmcgdGhlIHZhcmlhYmxlcyBvZiBpbnRlcmVzdA0KDQoNCmRhdGFfc3ViIDwtIGRhdGEgJT4lDQogIHNlbGVjdCh0ZXN0LnByZXBhcmF0aW9uLmNvdXJzZSwgbWF0aC5zY29yZSwgd3JpdGluZy5zY29yZSwgcmVhZGluZy5zY29yZSkNCg0KIyBXZSB3aWxsIGNyZWF0ZSBhIHZhcmlhYmxlIGNhbGxlZCB0ZXN0c19zY29yZQ0KIyB0ZXN0c19zY29yZSBvciBzdHVkZW50IHBlcmZvcm1hbmNlIGlzIHRoZSBzdHVkZW50IGF2ZXJhZ2Ugc2NvcmUgDQojIGluIG1hdGgsIHdyaXRpbmcsIGFuZCByZWFkaW5nLg0KIyBXZSB3aWxsIHJlbmFtZSB0aGUgdmFyaWFibGUgdGVzdC5wcmVwYXJhdGlvbi5jb3Vyc2UgYXMgdGVzdF9wcmVwDQojIFdoaWNoIGRlc2NyaWJlcyBpZiB0aGUgc3R1ZGVudCB0b29rIHRlc3QgcHJlcGFyYXRpb24gdGVzdCBvciBub3QNCiMgVGhvc2UgdHdvIHZhcmlhYmxlcyBhcmUgdGhlIHZhcmlhYmxlcyB3ZSBhcmUgaW50ZXJlc3RlZCBpbi4NCg0KZGF0YV9jbGVhbiA8LSBkYXRhX3N1YiAlPiUNCiAgdHJhbnNtdXRlKHRlc3QucHJlcGFyYXRpb24uY291cnNlLCB0ZXN0c19zY29yZSA9IChtYXRoLnNjb3JlICsgd3JpdGluZy5zY29yZSArIHJlYWRpbmcuc2NvcmUpIC8gMykNCg0KbmFtZXMoZGF0YV9jbGVhbikgPC0gYygidGVzdF9wcmVwIiwgInRlc3RzX3Njb3JlIikNCg0KIyBDaGVjayBmb3IgbWlzc2luZyB2YWx1ZXMNCg0Kc3VtKGlzLm5hKGRhdGFfY2xlYW4pKQ0KDQoNCiMgTGV0IGNyZWF0ZSBhIG5ldyB2YXJpYWJsZSB0ZXN0X3ByZXBfY291cnNlLCB3aGljaCB3aWxsIGJlIGNvZGVkIGFzIGVpdGhlcg0KIyAieWVzIiBpZiB0aGV5IGhhZCBhIHByZXBhcmF0aW9uIGNvdXJzZSwgYW5kICJubyIgaWYgbm90DQojIEFuZCBjcmVhdGUgYSBuZXcgZHRhIGZyYW1lIHRoYXQgd2lsbCBrZWVwIHRoaXMgbmV3IHZhcmlhYmxlIGFuZCB0ZXN0X3ByZXBfc2NvcmUgdmFyaWFibGUNCg0KZGF0YV9maW5hbCA8LSBkYXRhX2NsZWFuICU+JQ0KICB0cmFuc211dGUodGVzdF9wcmVwX2NvdXJzZSA9IGlmZWxzZShkYXRhX2NsZWFuJHRlc3RfcHJlcCA9PSAiY29tcGxldGVkIiwgInllcyIsICJubyIpLCB0ZXN0c19zY29yZSkNCg0KYGBgDQoNCg0KIyMjIDIuIFJlc2VhcmNoIHF1ZXN0aW9uIA0KDQpJcyB0aGUgYXZlcmFnZSB0ZXN0cyBzY29yZSBkaWZmZXJlbnQgZnJvbSBzdHVkZW50cyB3aG8NCmhhdmUgdGVzdCBwcmVwYXJhdGlvbiBjb3Vyc2UgYW5kIHRob3NlIHdobyBkb24ndCA/DQoNCg0KIyMjIDMuIENhc2VzIA0KDQpFYWNoIGNhc2UgcmVwcmVzZW50cyBhIHN0dWRlbnQgaW4gdGhlIFVuaXRlZCBTdGF0ZXMuIFRoZXJlIGFyZSAxMDAwIG9ic2VydmF0aW9ucyBpbiB0aGUgZ2l2ZW4gZGF0YSBzZXQuDQoNCg0KIyMjIDQuIERhdGEgY29sbGVjdGlvbiANCg0KDQpEYXRhIGlzIGNvbGxlY3RlZCBieSBba2FnZ2xlXShodHRwczovL3d3dy5rYWdnbGUuY29tL3Nwc2NpZW50aXN0L3N0dWRlbnRzLXBlcmZvcm1hbmNlLWluLWV4YW1zKS4NCkRhdGEgaXMgc3VibWl0dGVkIGJ5IGEgbWVtYmVyIHRvIGV4cGxvcmUgKGV4cGxvcmF0b3J5IGRhdGEgYW5hbHlzaXMpIGFuZCBidWlsZCBpbiBhIA0Kd2ViLWJhc2VkIGRhdGEgc2NpZW5jZSBlbnZpcm9ubWVudC4NCg0KVGhlIGRhdGEgY2FuIGJlIGFjY2Vzc2VkIGRpcmVjdGx5IGZyb20gdGhlIHJlcG9zaXRvcnkgYXQgW0dpdGh1Yl0oaHR0cHM6Ly9yYXcuZ2l0aHVidXNlcmNvbnRlbnQuY29tL2puYXRha3kvREFUQS02MDcvbWFzdGVyL0EyX1ZhcmlvdXNfZGF0YXNldF90cmFuc2Zvcm1hdGlvbi9zdHVkZW50c19wZXJmb3JtYW5jZS5jc3YpDQoNCg0KIyMjIDUuIFR5cGUgb2Ygc3R1ZHkgDQoNCg0KVGhpcyBpcyBhbiBvYnNlcnZhdGlvbmFsIHN0dWR5Lg0KDQoNCiMjIyA2LiBEYXRhIFNvdXJjZSANCg0KRGF0YSBpcyBmcm9tIGthZ2dsZSBwdWJsaWMgZGF0YXNldHMgYW5kIGNhbiBiZSBmb3VuZCBvbmxpbmUgaGVyZToNCmh0dHBzOi8vd3d3LmthZ2dsZS5jb20vc3BzY2llbnRpc3Qvc3R1ZGVudHMtcGVyZm9ybWFuY2UtaW4tZXhhbXMNCg0KDQoNCiMjIyA3LiBSZXNwb25zZSANCg0KVGhlIHJlc3BvbnNlIHZhcmlhYmxlIGlzIG1lYW4gdGVzdHMgc2NvcmUgYW5kIGlzIG51bWVyaWNhbC4NCg0KDQojIyMgOC4gRXhwbGFuYXRvcnkgDQoNClRoZSBleHBsYW5hdG9yeSB2YXJpYWJsZSBpcyB0ZXN0IHByZXBhcmF0aW9uIGNvdXJzZSBhbmQgaXMgY2F0ZWdvcmljYWwuDQoNCg0KIyMjIDkuIFJlbGV2YW50IHN1bW1hcnkgc3RhdGlzdGljcyANCg0KDQoNCg0KYGBge3J9DQoNCiMgQ3JlYXRlIHN1YnNldHMgZm9yIHRlc3QgcHJlcGFyYXRpb24gY291cnNlIGFuZCB0ZXN0IHBlcmZvcm1hbmNlDQoNCnRlc3RfcHJlcF95ZXMgPC0gZGF0YV9maW5hbCAlPiUNCiAgZmlsdGVyKHRlc3RfcHJlcF9jb3Vyc2UgPT0gInllcyIpDQoNCnRlc3RfcHJlcF9ubyA8LSBkYXRhX2ZpbmFsICU+JQ0KICBmaWx0ZXIodGVzdF9wcmVwX2NvdXJzZSA9PSAibm8iKQ0KDQojIFNpZGUtYnktc2lkZSBib3hwbG90IHRvIHZpc3VhbGl6ZSB0aGUgcmVsYXRpb25zaGlwIGJldHdlZW4gdGhlIHR3byB2YXJpYWJsZXMNCg0KYm94cGxvdCh0ZXN0X3ByZXBfeWVzJHRlc3RzX3Njb3JlLCB0ZXN0X3ByZXBfbm8kdGVzdHNfc2NvcmUsDQogICAgICAgIG5hbWVzID0gYygiVGVzdCBzY29yZSB3aXRoIHByZXBhcmF0aW9uIiwgIlRlc3Qgc2NvcmUgd2l0aCBubyBwcmVwYXJhdGlvbiIpKQ0KYGBgDQoNCg0KDQpgYGB7cn0NCg0KIyBTdW1tYXJ5IGZvciBzdHVkZW50cyB3aG8gaGFkIHRlc3QgcHJlcGFyYXRpb24gY291cnNlDQoNCmRlc2NyaWJlKHRlc3RfcHJlcF95ZXMkdGVzdHNfc2NvcmUpDQoNCmBgYA0KDQoNCg0KDQpgYGB7cn0NCg0KIyBTdW1tYXJ5IGZvciBzdHVkZW50cyB3aG8gaGFkIG5vIHRlc3QgcHJlcGFyYXRpb24gY291cnNlDQoNCmRlc2NyaWJlKHRlc3RfcHJlcF9ubyR0ZXN0c19zY29yZSkNCg0KYGBgDQoNCg0KDQoNCmBgYHtyfQ0KDQojIEhpc3RvZ3JhbSBmb3Igc3R1ZGVudHMgd2hvIGhhZCB0ZXN0IHByZXBhcmF0aW9uIGNvdXJzZQ0KIyBUbyB2aXN1YWxpemUgdGhlIGRpc3RyaWJ1dGlvbiAoc2hhcGUsIGNlbnRlciwuLi4pDQoNCmdncGxvdCh0ZXN0X3ByZXBfeWVzLCBhZXMoeCA9IHRlc3RzX3Njb3JlKSkgKyANCiAgZ2VvbV9oaXN0b2dyYW0oKSArDQogIGdndGl0bGUoIkRpc3RyaWJ1dGlvbiBvZiBzdHVkZW50cyB3aG8gaGFkIHRlc3QgcHJlcGFyYXRpb24gY291cnNlIikNCg0KYGBgDQoNCg0KDQoNCg0KYGBge3J9DQoNCiMgSGlzdG9ncmFtIGZvciBzdHVkZW50cyB3aG8gaGFkIG5vIHRlc3QgcHJlcGFyYXRpb24gY291cnNlDQojIFRvIHZpc3VhbGl6ZSB0aGUgZGlzdHJpYnV0aW9uIChzaGFwZSwgY2VudGVyLC4uLikNCg0KZ2dwbG90KHRlc3RfcHJlcF9ubywgYWVzKHggPSB0ZXN0c19zY29yZSkpICsgDQogIGdlb21faGlzdG9ncmFtKCkgKw0KICBnZ3RpdGxlKCJEaXN0cmlidXRpb24gb2Ygc3R1ZGVudHMgd2hvIGhhZCBubyB0ZXN0IHByZXBhcmF0aW9uIGNvdXJzZSIpDQoNCmBgYA0KDQo=