##-----------------Review Problems (Chapter 4-------------------------##
##1. Listed below are measures of pain intensity before and
##after using the proprietary drug Duragesic (based on data
##from Janssen Pharmaceutical Products, L.P.) The data are
##listed in order by row, and corresponding measures are from
##the same subject before and after treatment. For example,
##the first subject had a measure of 1.2 before treatment and
##a measure of 0.4 after treatment. Each pair of measurements
##is from one subject, and the intensity of pain was measured
##using the standard visual analog score.
##data
bt <- c(1.2,1.3,1.5,1.6,8.0,3.4,3.5,2.8,2.6,2.2,3.0,7.1,2.3,2.1,
3.4,6.4,5.0,4.2,2.8,3.9,5.2,6.9,6.9,5.0,5.5,6.0,5.5,8.6,9.4,10.0,7.6)
at <- c(0.4,1.4,1.8,2.9,6.0,1.4,0.7,3.9,0.9,1.8,0.9,9.3,8.0,6.8,2.3,0.4,
0.7,1.2,4.5,2.0,1.6,2.0,2.0,6.8,6.6,4.1,4.6,2.9,5.4,4.8,4.1)
t.test(at,bt, paired = T, alternative = "two.sided")
##
## Paired t-test
##
## data: at and bt
## t = -2.6234, df = 30, p-value = 0.01355
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -2.4497022 -0.3051365
## sample estimates:
## mean of the differences
## -1.377419
##Since the p value of 0.01355 is less than 0.05 alpha therefore, there is a
##significant difference between the pain intensity before and after duragesic
##treatment. Moreover, the pain medicine duragesic is effective in reducing
##pain.
##Steven Schmidt (1994) conducted a series of experiments examining
##the effects of humor on memory. In one study, participants were
##given a mix of humorous and non-humorous sentences and significantly
##more humorous sentences were recalled. However, Schmidt argued that
##the humorous sentences were not necessarily easier to remember,
##they were simply preferred when participants had a choice between
##the two types of sentence. To test this argument, he switched to an
##independent-measures design in which one group got a set of
##exclusively humorous sentences and another group got a set of
##exclusively non.humorous sentences. The following data are similar
##to the results from the independent-measures study.
##Making a vector
hum_sen <- c(4,5,2,4,6,7,6,6,2,5,4,3,3,3,5,5)
Nonhum_Sen <- c(6,3,5,3,3,4,2,6,4,3,4,4,5,2,6,4)
##A. Determine if the data for both humorous and nonhumorous sentences
##are approximately normally distributed.
datasets <- c(hum_sen,Nonhum_Sen)
shapiro.test(datasets)
##
## Shapiro-Wilk normality test
##
## data: datasets
## W = 0.93143, p-value = 0.04293
shapiro.test(hum_sen)
##
## Shapiro-Wilk normality test
##
## data: hum_sen
## W = 0.94622, p-value = 0.4323
shapiro.test(Nonhum_Sen)
##
## Shapiro-Wilk normality test
##
## data: Nonhum_Sen
## W = 0.91317, p-value = 0.1309
hist(hum_sen)

hist(Nonhum_Sen)

qqnorm(hum_sen, ylab="Ordered response values",
xlab="Normal N(0,1) Order Statistic Medians",
main="Normal Probability Plot")
qqline(hum_sen)
##Since the p values of 0.4323 and 0.1309 for both humorous and nonhumorous
##sentences are greater than 0.05 alpha therefore, both datasets are normally distrubuted.
##For humorous sentences
shapiro.test(hum_sen)
##
## Shapiro-Wilk normality test
##
## data: hum_sen
## W = 0.94622, p-value = 0.4323
##For non-humorous sentences
shapiro.test(Nonhum_Sen)
##
## Shapiro-Wilk normality test
##
## data: Nonhum_Sen
## W = 0.91317, p-value = 0.1309
##For the both humorous and nonhumorous sentences, the p-value is
## 0.4323 (W=0.94622) and 0.1309 (W=0.91317) which is greater than
## 0.05 hence we can conclude that the data is normally distributed
##B. Tests the hypothesis of equality of variance of the dependent
## variable for groups defined by types of sentences.
library(car)
## Warning: package 'car' was built under R version 4.1.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.1.3
library(readxl)
## Warning: package 'readxl' was built under R version 4.1.2
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.2
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.8
## v tidyr 1.2.0 v stringr 1.4.0
## v readr 2.1.2 v forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.1.2
## Warning: package 'tibble' was built under R version 4.1.2
## Warning: package 'tidyr' was built under R version 4.1.2
## Warning: package 'readr' was built under R version 4.1.2
## Warning: package 'purrr' was built under R version 4.1.2
## Warning: package 'dplyr' was built under R version 4.1.3
## Warning: package 'stringr' was built under R version 4.1.2
## Warning: package 'forcats' was built under R version 4.1.2
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x dplyr::recode() masks car::recode()
## x purrr::some() masks car::some()

sentences = tibble(
Humorous_Sentences <- c(4,5,2,4,6,7,6,6,2,5,4,3,3,3,5,5),
Nonhumorous_Sentences = c(6,3,5,3,3,4,2,6,4,3,4,4,5,2,6,4)
)
sentences_longer <- sentences %>%
pivot_longer(.,everything(), values_to = "effects", names_to = "type")
leveneTest(effects ~ type, sentences_longer,center = "mean")
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
## Levene's Test for Homogeneity of Variance (center = "mean")
## Df F value Pr(>F)
## group 1 0.8 0.3782
## 30
##Since p-value of 0.3782 is greater than 0.05 level of significance
##therefore, accept the null hypothesis. The data indicate that
##variance is equal.
##The variances are equal, thus t-test: Two-Sample Assuming
##Equal Variances (Equal Variances Assumed) will be employed.
##C. Do the results indicate a significant difference in the recall of
##humorous versus nonhumorous sentences? Use a two-tailed test with
##alpha = 0.5
##independent t-test assuming equal variances
t.test(hum_sen,Nonhum_Sen, paired = T, alternative = "two.sided")
##
## Paired t-test
##
## data: hum_sen and Nonhum_Sen
## t = 0.71646, df = 15, p-value = 0.4847
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.7406226 1.4906226
## sample estimates:
## mean of the differences
## 0.375
## Since the p value is 0.4847 which is greater than 0.05 therefore
## we accept the null hypothesis that there is no significant difference
## between two sentences.
##Dataset 1
dataset_1 <- read_excel("C:/Users/user/Downloads/dataset1.xlsx")
head(dataset_1)
## # A tibble: 6 x 6
## Student Strand CAT Communication Science Math
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1 HUMSS 52 54 50 42
## 2 2 HUMSS 51 56 50 42
## 3 3 HUMSS 42 62 36 24
## 4 4 HUMSS 52 64 52 36
## 5 5 HUMSS 48 62 42 30
## 6 6 HUMSS 49 60 42 28
##A. Construct a histogram for the data CAT, Communication, Science, and Math
hist(dataset_1$CAT)

ks.test(dataset_1$CAT,"pnorm", mean = mean(dataset_1$CAT), sd = sd(dataset_1$CAT))
## Warning in ks.test(dataset_1$CAT, "pnorm", mean = mean(dataset_1$CAT), sd =
## sd(dataset_1$CAT)): ties should not be present for the Kolmogorov-Smirnov test
##
## One-sample Kolmogorov-Smirnov test
##
## data: dataset_1$CAT
## D = 0.097671, p-value = 0.7268
## alternative hypothesis: two-sided
hist(dataset_1$Communication)

ks.test(dataset_1$Communication,"pnorm", mean = mean(dataset_1$Communication), sd = sd(dataset_1$Communication))
## Warning in ks.test(dataset_1$Communication, "pnorm", mean =
## mean(dataset_1$Communication), : ties should not be present for the Kolmogorov-
## Smirnov test
##
## One-sample Kolmogorov-Smirnov test
##
## data: dataset_1$Communication
## D = 0.097701, p-value = 0.7264
## alternative hypothesis: two-sided
hist(dataset_1$Science)

ks.test(dataset_1$Science,"pnorm", mean = mean(dataset_1$Science), sd = sd(dataset_1$Science))
## Warning in ks.test(dataset_1$Science, "pnorm", mean = mean(dataset_1$Science), :
## ties should not be present for the Kolmogorov-Smirnov test
##
## One-sample Kolmogorov-Smirnov test
##
## data: dataset_1$Science
## D = 0.085652, p-value = 0.8567
## alternative hypothesis: two-sided
hist(dataset_1$Math)

ks.test(dataset_1$Math,"pnorm", mean = mean(dataset_1$Math), sd = sd(dataset_1$Math))
## Warning in ks.test(dataset_1$Math, "pnorm", mean = mean(dataset_1$Math), : ties
## should not be present for the Kolmogorov-Smirnov test
##
## One-sample Kolmogorov-Smirnov test
##
## data: dataset_1$Math
## D = 0.17517, p-value = 0.09297
## alternative hypothesis: two-sided
histogram<- dataset_1 %>%
pivot_longer(., -c(Student, Strand), values_to = "Score", names_to = "Subject")
histogram %>%
ggplot(aes(x = Score, fill = Subject))+
geom_histogram(aes(y =..density..) ,bins = 20)+
geom_density(alpha = .5, color = "darkgrey")+
facet_wrap(~ Subject, ncol = 2)
