This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(rmarkdown)
library(readxl)
tuteperf <- read_csv("C:/Users/thyagu/rmit/applied analytics/Assign4/Assignment 4b-2.csv")
## Parsed with column specification:
## cols(
## Gender = col_double(),
## IQ = col_double(),
## Profession = col_double(),
## Advice = col_double(),
## School = col_double(),
## `Score after tutorial` = col_double(),
## `Score before tutorial` = col_double()
## )
head(tuteperf,5)
## # A tibble: 5 x 7
## Gender IQ Profession Advice School `Score after tutori~ `Score before tuto~
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 91 3 4 1 42 50
## 2 2 117 3 2 1 38 13
## 3 1 105 3 1 1 43 27
## 4 1 110 3 4 1 37 44
## 5 2 101 3 2 1 35 35
class(tuteperf$`Score after tutorial`)
## [1] "numeric"
tuteperf <- tuteperf %>% rename(score_after = `Score after tutorial`)
tuteperf <- tuteperf %>% rename(score_before = `Score before tutorial`)
class(tuteperf$score_after)
## [1] "numeric"
class(tuteperf$score_before)
## [1] "numeric"
sum(is.na(tuteperf$score_after))
## [1] 0
sum(is.na(tuteperf$score_before))
## [1] 0
#create differences column d
tuteperf <- tuteperf %>% mutate(d = score_after - score_before)
summary_before <- tuteperf %>%
summarise(
Min = min(score_before, na.rm = TRUE),
Q1 = quantile(score_before, probs = .25, na.rm = TRUE),
Median = median(score_before, na.rm = TRUE),
Q3 = quantile(score_before, probs = .75, na.rm = TRUE),
Max = max(score_before, na.rm = TRUE),
Mean = mean(score_before, na.rm = TRUE),
SD = sd(score_before, na.rm = TRUE),
n = n(),
Missing = sum(is.na(score_before))
)
summary_before
## # A tibble: 1 x 9
## Min Q1 Median Q3 Max Mean SD n Missing
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <int>
## 1 13 27 37 44 55 35.8 10.5 1290 0
summary_after <- tuteperf %>%
summarise(
Min = min(score_after, na.rm = TRUE),
Q1 = quantile(score_after, probs = .25, na.rm = TRUE),
Median = median(score_after, na.rm = TRUE),
Q3 = quantile(score_after, probs = .75, na.rm = TRUE),
Max = max(score_after, na.rm = TRUE),
Mean = mean(score_after, na.rm = TRUE),
SD = sd(score_after, na.rm = TRUE),
n = n(),
Missing = sum(is.na(score_after))
)
summary_after
## # A tibble: 1 x 9
## Min Q1 Median Q3 Max Mean SD n Missing
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <int>
## 1 33 37 41 44 55 41.2 4.95 1290 0
# Visualisation
matplot(t(data.frame(tuteperf$score_before, tuteperf$score_after)),
type = "b",
pch = 19,
col = 1,
lty = 1,
xlab = "Timing(Before or after)",
ylab = "Score",
xaxt = "n"
)
axis(1, at = 1:2, labels = c("Before", "After"))
#
boxplot(tuteperf$score_before,
tuteperf$score_after,
ylab = "Scores",
xlab = "Time"
)
axis(1, at = 1:2, labels = c("Before", "After"))
#
boxplot(tuteperf$d)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
#checking normality of difference scores
qqPlot(tuteperf$d, dist="norm")
## [1] 8 83
summary_difference <- tuteperf %>%
summarise(
Min = min(d, na.rm = TRUE),
Q1 = quantile(d, probs = .25, na.rm = TRUE),
Median = median(d, na.rm = TRUE),
Q3 = quantile(d, probs = .75, na.rm = TRUE),
Max = max(d, na.rm = TRUE),
Mean = mean(d, na.rm = TRUE),
SD = sd(d, na.rm = TRUE),
n = n(),
Missing = sum(is.na(d))
)
summary_difference
## # A tibble: 1 x 9
## Min Q1 Median Q3 Max Mean SD n Missing
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <int>
## 1 -14 0 0 14 31 5.35 10.0 1290 0
t.test(tuteperf$score_after, tuteperf$score_before,
paired = TRUE,
alternative = "two.sided",
conf.level = 0.95)
##
## Paired t-test
##
## data: tuteperf$score_after and tuteperf$score_before
## t = 19.144, df = 1289, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 4.802104 5.898671
## sample estimates:
## mean of the differences
## 5.350388
t.test(tuteperf$score_after, tuteperf$score_before,
paired = TRUE,
alternative = "greater",
conf.level = 0.95)
##
## Paired t-test
##
## data: tuteperf$score_after and tuteperf$score_before
## t = 19.144, df = 1289, p-value < 2.2e-16
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## 4.890355 Inf
## sample estimates:
## mean of the differences
## 5.350388
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.