KNIT4B

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)

##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

library(readr)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ggplot2) 
library(rmarkdown)
library(readxl)
tuteperf <- read_csv("C:/Users/thyagu/rmit/applied analytics/Assign4/Assignment 4b-2.csv")

## Parsed with column specification:
## cols(
##   Gender = col_double(),
##   IQ = col_double(),
##   Profession = col_double(),
##   Advice = col_double(),
##   School = col_double(),
##   `Score after tutorial` = col_double(),
##   `Score before tutorial` = col_double()
## )

head(tuteperf,5)

## # A tibble: 5 x 7
##   Gender    IQ Profession Advice School `Score after tutori~ `Score before tuto~
##    <dbl> <dbl>      <dbl>  <dbl>  <dbl>                <dbl>               <dbl>
## 1      1    91          3      4      1                   42                  50
## 2      2   117          3      2      1                   38                  13
## 3      1   105          3      1      1                   43                  27
## 4      1   110          3      4      1                   37                  44
## 5      2   101          3      2      1                   35                  35

class(tuteperf$`Score after tutorial`)

## [1] "numeric"

tuteperf <- tuteperf %>% rename(score_after = `Score after tutorial`)
tuteperf <- tuteperf %>% rename(score_before = `Score before tutorial`)
class(tuteperf$score_after)

## [1] "numeric"

class(tuteperf$score_before)

## [1] "numeric"

sum(is.na(tuteperf$score_after))

## [1] 0

sum(is.na(tuteperf$score_before))

## [1] 0

#create differences column d
tuteperf <- tuteperf %>% mutate(d = score_after - score_before)


summary_before <- tuteperf %>% 
  summarise(
    Min = min(score_before, na.rm = TRUE),
    Q1 = quantile(score_before, probs = .25, na.rm = TRUE),
    Median = median(score_before, na.rm = TRUE),
    Q3 = quantile(score_before, probs = .75, na.rm = TRUE),
    Max = max(score_before, na.rm = TRUE),
    Mean = mean(score_before, na.rm = TRUE),
    SD = sd(score_before, na.rm = TRUE),
    n = n(),
    Missing = sum(is.na(score_before))
  ) 

summary_before

## # A tibble: 1 x 9
##     Min    Q1 Median    Q3   Max  Mean    SD     n Missing
##   <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <int>   <int>
## 1    13    27     37    44    55  35.8  10.5  1290       0

summary_after <- tuteperf %>% 
  summarise(
    Min = min(score_after, na.rm = TRUE),
    Q1 = quantile(score_after, probs = .25, na.rm = TRUE),
    Median = median(score_after, na.rm = TRUE),
    Q3 = quantile(score_after, probs = .75, na.rm = TRUE),
    Max = max(score_after, na.rm = TRUE),
    Mean = mean(score_after, na.rm = TRUE),
    SD = sd(score_after, na.rm = TRUE),
    n = n(),
    Missing = sum(is.na(score_after))
  )

summary_after

## # A tibble: 1 x 9
##     Min    Q1 Median    Q3   Max  Mean    SD     n Missing
##   <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <int>   <int>
## 1    33    37     41    44    55  41.2  4.95  1290       0

# Visualisation
matplot(t(data.frame(tuteperf$score_before, tuteperf$score_after)),
        type = "b",
        pch = 19,
        col = 1,
        lty = 1,
        xlab = "Timing(Before or after)",
        ylab = "Score",
        xaxt = "n"
)
axis(1, at = 1:2, labels = c("Before", "After"))

#
boxplot(tuteperf$score_before,
  tuteperf$score_after,
  ylab = "Scores",
  xlab = "Time"
)
axis(1, at = 1:2, labels = c("Before", "After"))

#
        

boxplot(tuteperf$d)

library(car)

## Loading required package: carData

## 
## Attaching package: 'car'

## The following object is masked from 'package:dplyr':
## 
##     recode

#checking normality of difference scores
qqPlot(tuteperf$d, dist="norm")

## [1]  8 83

summary_difference <- tuteperf %>% 
  summarise(
    Min = min(d, na.rm = TRUE),
    Q1 = quantile(d, probs = .25, na.rm = TRUE),
    Median = median(d, na.rm = TRUE),
    Q3 = quantile(d, probs = .75, na.rm = TRUE),
    Max = max(d, na.rm = TRUE),
    Mean = mean(d, na.rm = TRUE),
    SD = sd(d, na.rm = TRUE),
    n = n(),
    Missing = sum(is.na(d))
  ) 

summary_difference

## # A tibble: 1 x 9
##     Min    Q1 Median    Q3   Max  Mean    SD     n Missing
##   <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <int>   <int>
## 1   -14     0      0    14    31  5.35  10.0  1290       0

t.test(tuteperf$score_after, tuteperf$score_before,
       paired = TRUE,
       alternative = "two.sided",
       conf.level = 0.95)

## 
##  Paired t-test
## 
## data:  tuteperf$score_after and tuteperf$score_before
## t = 19.144, df = 1289, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  4.802104 5.898671
## sample estimates:
## mean of the differences 
##                5.350388

t.test(tuteperf$score_after, tuteperf$score_before,
        paired = TRUE,
        alternative = "greater",
        conf.level = 0.95)

## 
##  Paired t-test
## 
## data:  tuteperf$score_after and tuteperf$score_before
## t = 19.144, df = 1289, p-value < 2.2e-16
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  4.890355      Inf
## sample estimates:
## mean of the differences 
##                5.350388

KNIT4B

R Markdown

Including Plots