##-----------------Review Problems (Chapter 4-------------------------##

##1. Listed below are measures of pain intensity before and 
##after using the proprietary drug Duragesic (based on data 
##from Janssen Pharmaceutical Products, L.P.) The data are 
##listed in order by row, and corresponding measures are from 
##the same subject before and after treatment. For example, 
##the first subject had a measure of 1.2 before treatment and 
##a measure of 0.4 after treatment. Each pair of measurements 
##is from one subject, and the intensity of pain was measured 
##using the standard visual analog score.

##data
bt <- c(1.2,1.3,1.5,1.6,8.0,3.4,3.5,2.8,2.6,2.2,3.0,7.1,2.3,2.1,
        3.4,6.4,5.0,4.2,2.8,3.9,5.2,6.9,6.9,5.0,5.5,6.0,5.5,8.6,9.4,10.0,7.6)

at <- c(0.4,1.4,1.8,2.9,6.0,1.4,0.7,3.9,0.9,1.8,0.9,9.3,8.0,6.8,2.3,0.4,
        0.7,1.2,4.5,2.0,1.6,2.0,2.0,6.8,6.6,4.1,4.6,2.9,5.4,4.8,4.1)

t.test(at,bt, paired = T, alternative = "two.sided")
## 
##  Paired t-test
## 
## data:  at and bt
## t = -2.6234, df = 30, p-value = 0.01355
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -2.4497022 -0.3051365
## sample estimates:
## mean of the differences 
##               -1.377419
##Since the p value of 0.01355 is less than 0.05 alpha therefore, there is a 
##significant difference between the pain intensity before and after duragesic 
##treatment. Moreover, the pain medicine duragesic is effective in reducing 
##pain. 

##Steven Schmidt (1994) conducted a series of experiments examining 
##the effects of humor on memory. In one study, participants were 
##given a mix of humorous and non-humorous sentences and significantly 
##more humorous sentences were recalled. However, Schmidt argued that 
##the humorous sentences were not necessarily easier to remember, 
##they were simply preferred when participants had a choice between 
##the two types of sentence. To test this argument, he switched to an
##independent-measures design in which one group got a set of 
##exclusively humorous sentences and another group got a set of 
##exclusively non.humorous sentences. The following data are similar 
##to the results from the independent-measures study.

##Making a vector
hum_sen <- c(4,5,2,4,6,7,6,6,2,5,4,3,3,3,5,5)
Nonhum_Sen <- c(6,3,5,3,3,4,2,6,4,3,4,4,5,2,6,4)

##A. Determine if the data for both humorous and nonhumorous sentences 
##are approximately normally distributed.

datasets <- c(hum_sen,Nonhum_Sen)
shapiro.test(datasets)
## 
##  Shapiro-Wilk normality test
## 
## data:  datasets
## W = 0.93143, p-value = 0.04293
shapiro.test(hum_sen)
## 
##  Shapiro-Wilk normality test
## 
## data:  hum_sen
## W = 0.94622, p-value = 0.4323
shapiro.test(Nonhum_Sen)
## 
##  Shapiro-Wilk normality test
## 
## data:  Nonhum_Sen
## W = 0.91317, p-value = 0.1309
hist(hum_sen)

hist(Nonhum_Sen)

qqnorm(hum_sen, ylab="Ordered response values", 
       xlab="Normal N(0,1) Order Statistic Medians",
       main="Normal Probability Plot")
qqline(hum_sen)

##Since the p values of 0.4323 and 0.1309 for both humorous and nonhumorous 
##sentences are greater than 0.05 alpha therefore, both datasets are normally distrubuted. 

##For humorous sentences 
shapiro.test(hum_sen)
## 
##  Shapiro-Wilk normality test
## 
## data:  hum_sen
## W = 0.94622, p-value = 0.4323
##For non-humorous sentences 
shapiro.test(Nonhum_Sen)
## 
##  Shapiro-Wilk normality test
## 
## data:  Nonhum_Sen
## W = 0.91317, p-value = 0.1309
##For the both humorous and nonhumorous sentences, the p-value is 
## 0.4323 (W=0.94622)  and 0.1309 (W=0.91317) which is greater than
## 0.05 hence we can conclude that the data is normally distributed

##B. Tests the hypothesis of equality of variance of the dependent 
## variable for groups defined by types of sentences.

library(car)
## Warning: package 'car' was built under R version 4.1.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.1.3
library(readxl)
## Warning: package 'readxl' was built under R version 4.1.2
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.2
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.8
## v tidyr   1.2.0     v stringr 1.4.0
## v readr   2.1.2     v forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.1.2
## Warning: package 'tibble' was built under R version 4.1.2
## Warning: package 'tidyr' was built under R version 4.1.2
## Warning: package 'readr' was built under R version 4.1.2
## Warning: package 'purrr' was built under R version 4.1.2
## Warning: package 'dplyr' was built under R version 4.1.3
## Warning: package 'stringr' was built under R version 4.1.2
## Warning: package 'forcats' was built under R version 4.1.2
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## x dplyr::recode() masks car::recode()
## x purrr::some()   masks car::some()

sentences = tibble(
  Humorous_Sentences <- c(4,5,2,4,6,7,6,6,2,5,4,3,3,3,5,5),
  Nonhumorous_Sentences = c(6,3,5,3,3,4,2,6,4,3,4,4,5,2,6,4)
)

sentences_longer <- sentences %>% 
  pivot_longer(.,everything(), values_to = "effects", names_to = "type")

leveneTest(effects ~ type, sentences_longer,center = "mean")
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  1     0.8 0.3782
##       30
##Since p-value of 0.3782 is greater than 0.05 level of significance 
##therefore, accept the null hypothesis. The data indicate that 
##variance is equal.

##The variances are equal, thus t-test: Two-Sample Assuming 
##Equal Variances (Equal Variances Assumed) will be employed.

##C. Do the results indicate a significant difference in the recall of
##humorous versus nonhumorous sentences? Use a two-tailed test with 
##alpha = 0.5

##independent t-test assuming equal variances

t.test(hum_sen,Nonhum_Sen, paired = T, alternative = "two.sided")
## 
##  Paired t-test
## 
## data:  hum_sen and Nonhum_Sen
## t = 0.71646, df = 15, p-value = 0.4847
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.7406226  1.4906226
## sample estimates:
## mean of the differences 
##                   0.375
## Since the p value is 0.4847 which is greater than 0.05 therefore 
## we accept the null hypothesis that there is no significant difference
## between two sentences. 

##Dataset 1 

dataset_1 <- read_excel("C:/Users/user/Downloads/dataset1.xlsx")
head(dataset_1)
## # A tibble: 6 x 6
##   Student Strand   CAT Communication Science  Math
##     <dbl> <chr>  <dbl>         <dbl>   <dbl> <dbl>
## 1       1 HUMSS     52            54      50    42
## 2       2 HUMSS     51            56      50    42
## 3       3 HUMSS     42            62      36    24
## 4       4 HUMSS     52            64      52    36
## 5       5 HUMSS     48            62      42    30
## 6       6 HUMSS     49            60      42    28
##A. Construct a histogram for the data CAT, Communication, Science, and Math 

hist(dataset_1$CAT)

ks.test(dataset_1$CAT,"pnorm", mean = mean(dataset_1$CAT), sd = sd(dataset_1$CAT))
## Warning in ks.test(dataset_1$CAT, "pnorm", mean = mean(dataset_1$CAT), sd =
## sd(dataset_1$CAT)): ties should not be present for the Kolmogorov-Smirnov test
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  dataset_1$CAT
## D = 0.097671, p-value = 0.7268
## alternative hypothesis: two-sided
hist(dataset_1$Communication)

ks.test(dataset_1$Communication,"pnorm", mean = mean(dataset_1$Communication), sd = sd(dataset_1$Communication))
## Warning in ks.test(dataset_1$Communication, "pnorm", mean =
## mean(dataset_1$Communication), : ties should not be present for the Kolmogorov-
## Smirnov test
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  dataset_1$Communication
## D = 0.097701, p-value = 0.7264
## alternative hypothesis: two-sided
hist(dataset_1$Science)

ks.test(dataset_1$Science,"pnorm", mean = mean(dataset_1$Science), sd = sd(dataset_1$Science))
## Warning in ks.test(dataset_1$Science, "pnorm", mean = mean(dataset_1$Science), :
## ties should not be present for the Kolmogorov-Smirnov test
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  dataset_1$Science
## D = 0.085652, p-value = 0.8567
## alternative hypothesis: two-sided
hist(dataset_1$Math)

ks.test(dataset_1$Math,"pnorm", mean = mean(dataset_1$Math), sd = sd(dataset_1$Math))
## Warning in ks.test(dataset_1$Math, "pnorm", mean = mean(dataset_1$Math), : ties
## should not be present for the Kolmogorov-Smirnov test
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  dataset_1$Math
## D = 0.17517, p-value = 0.09297
## alternative hypothesis: two-sided
histogram<- dataset_1 %>% 
  pivot_longer(., -c(Student, Strand), values_to = "Score", names_to = "Subject")

histogram %>% 
  ggplot(aes(x = Score, fill = Subject))+
  geom_histogram(aes(y =..density..) ,bins = 20)+
  geom_density(alpha = .5, color = "darkgrey")+
  facet_wrap(~ Subject, ncol = 2)