library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.2     v dplyr   1.0.7
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggplot2)
library(gt)

CH 56: 1, 2, 4, & 6

1): Practice: Make a frequency table and histogram for the following data. Then write a short description of the shape of the distribution in words.

Freqency Table

x <- c(11, 8, 9, 12, 9, 10, 12, 13, 11, 13, 12, 6, 10, 17, 13, 11, 12, 12, 14, 14)

d <- as_tibble(x) %>%
  mutate(z_score = (value - mean(value))/sd(value),
         p_rank = (percent_rank(value))) %>%
  add_count(value)

d_freq <- d %>%
  group_by(value) %>%
  select(value, n, z_score, p_rank) %>%
  arrange(value) %>%
  slice(1)

print(d_freq)
## # A tibble: 9 x 4
## # Groups:   value [9]
##   value     n z_score p_rank
##   <dbl> <int>   <dbl>  <dbl>
## 1     6     1  -2.26  0     
## 2     8     1  -1.43  0.0526
## 3     9     2  -1.01  0.105 
## 4    10     2  -0.600 0.211 
## 5    11     3  -0.186 0.316 
## 6    12     5   0.228 0.474 
## 7    13     3   0.641 0.737 
## 8    14     2   1.06  0.895 
## 9    17     1   2.30  1

Histogram

ggplot(d, aes(x = value)) + 
  geom_histogram(binwidth=1) +
  scale_x_continuous(breaks=seq(0, 20, 1))

2) Practice: For the data in Exercise 1, compute the mean, median, mode, standard deviation, and range.

d <- as_tibble(x) %>%
  mutate(x_z = (value - mean(value))/sd(value),
         x_p_r = (percent_rank(value))) %>%
  add_count(value)

d_sum <- d %>%
  summarise(
    mean = mean(value),
    min = min(value),
    max = max(value),
    median = median(value),
    mode = 12,
    sd = sd(value)
  )

gt_d_sum <- d_sum %>%
  gt()
 
gt_d_sum 
mean min max median mode sd
11.45 6 17 12 12 2.4165

4) Practice: The following data represent scores on the Rosenberg Self-Esteem Scale for a sample of 10 Japanese university students and 10 American university students. (Although hypothetical, these data are consistent with empirical findings [Schmitt & Allik, 2005][1].) Compute the means and standard deviations of the two groups, make a bar graph, compute Cohen’s d, and describe the strength of the relationship in words.

Summary Statistics

Score <- c(25, 20, 24, 28, 30, 32, 21, 24, 20, 26)
Country <- ("Japan")

d_J <- data.frame(Country, Score)
d_J <- d_J %>%  as_tibble(d_J) %>%
  mutate(row = row_number(),
         sd = sd(Score),
         sd_sq = sd * sd)


Score <- c(27, 30, 34, 37, 26, 24, 28, 35, 33, 36)
Country <- c("US")

d_US <- data.frame(Country, Score)
d_US <- d_US %>%
  as_tibble(d_US) %>%
  mutate(row = row_number(),
  sd = sd(Score),
  sd_sq = sd * sd)

d <- rbind(d_US, d_J)

sum_sd = 4.109609 + 4.594683
mean_d = 25 - 31

d_sum <- d %>%
  group_by(Country)%>%
  summarise(
    mean = mean(Score),
    mean_d = mean_d,
    sd = sd(Score),
    upper_sd = (mean + sd),
    lower_sd = (mean - sd),
    sum_sd = sum_sd,
    sum_sd_sq = sum_sd * sum_sd,
    half_sum_sd_sq = (sum_sd_sq/2),
    sd_pooled = sqrt(half_sum_sd_sq),
    cohen_d = mean_d /sd_pooled)

gt_d_sum <- d_sum %>%
  gt()

gt_d_sum
Country mean mean_d sd upper_sd lower_sd sum_sd sum_sd_sq half_sum_sd_sq sd_pooled cohen_d
Japan 25 -6 4.109609 29.10961 20.89039 8.704292 75.7647 37.88235 6.154864 -0.9748388
US 31 -6 4.594683 35.59468 26.40532 8.704292 75.7647 37.88235 6.154864 -0.9748388

Plot of scores by country, with data overlaid. Error bars are SD.

plot <- ggplot(data= d_sum, aes(x = Country, y = mean)) +
  geom_bar(stat="identity") +
  geom_point(d = d, aes(x = Country, y = Score), size = 3) +
  geom_errorbar(aes(ymin = lower_sd, ymax = upper_sd))
  
print(plot)

Linear regression of score on country

h1 <- lm(data = d, Score ~ Country)
summary (h1)
## 
## Call:
## lm(formula = Score ~ Country, data = d)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -7.00  -4.00  -0.50   3.25   7.00 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   25.000      1.378  18.137 5.17e-13 ***
## CountryUS      6.000      1.949   3.078  0.00648 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.359 on 18 degrees of freedom
## Multiple R-squared:  0.3448, Adjusted R-squared:  0.3084 
## F-statistic: 9.474 on 1 and 18 DF,  p-value: 0.006485

Interpretation:

Country has a statistically significant effect on self-esteem such that students in the US score an average of 6 points higher on the Rosenberg Self Esteem scale than students in Japan (r = 6.0, F[1,18] = 9.47, p = 0.006, Model R^2 = 0.34). This is about a 1 standard deviation difference and represents a moderate effect size.

6) Practice: In a classic study, men and women rated the importance of physical attractiveness in both a short-term mate and a long-term mate (Buss & Schmitt, 1993)[2]. The means and standard deviations are as follows. Men / Short Term: M = 5.67, SD = 2.34; Men / Long Term: M = 4.43, SD = 2.11; Women / Short Term: M = 5.67, SD = 2.48; Women / Long Term: M = 4.22, SD = 1.98. Present these results in writing, in a figure, in a table

Table of results

gender <- c("men", "men", "women", "women")
time <- c("long", "short", 'long', "short")
mean <- c(5.67, 4.43, 5.67, 4.22)
sd <- c(2.34, 2.11, 2.48, 1.98)

d_sum = data.frame(gender, time, mean, sd) %>%
    mutate (upper_sd = (mean + sd),
            lower_sd = (mean - sd)) %>%
  arrange(desc(time))

gt_d_sum <- d_sum %>%
  gt()

gt_d_sum
gender time mean sd upper_sd lower_sd
men short 4.43 2.11 6.54 2.32
women short 4.22 1.98 6.20 2.24
men long 5.67 2.34 8.01 3.33
women long 5.67 2.48 8.15 3.19

Plot of results

plot <- ggplot(d_sum, aes(fill = time, y = mean, x = gender)) + 
  geom_col(position ="dodge2") +
  geom_errorbar(aes(ymin = lower_sd, ymax = upper_sd),
                position = "dodge2")
  
print(plot)

Interpretation:

Without an n, it is not possible to calculate a p-value or confidence interval. Looking at the means, it appears there is an effect of time on attractiveness ratings, such that men and women give higher attractiveness ratings in the long-term than in the short-term. There does not appear to be an interaction between time and gender on attractiveness ratings.

Part 2: CH 61 2,5,6,7

2) Practice: Use Table 13.1 to decide whether each of the following results is statistically significant.

2a) The correlation between two variables is r = −.78 based on a sample size of 137.

this <-  "Large n and large effect size = strong relationship"
print(this)
## [1] "Large n and large effect size = strong relationship"

2b) The mean score on a psychological characteristic for women is 25 (SD = 5) and the mean score for men is 24 (SD = 5). There were 12 women and 10 men in this study.

that <- "Small n and small effect size = weak or no relationship"
print(that)
## [1] "Small n and small effect size = weak or no relationship"

2c) In a memory experiment, the mean number of items recalled by the 40 participants in Condition A was 0.50 standard deviations greater than the mean number recalled by the 40 participants in Condition B.

answer <- "Medium n and small effect = weak or no relationship"
print(answer)
## [1] "Medium n and small effect = weak or no relationship"

2d) In another memory experiment, the mean scores for participants in Condition A and Condition B came out exactly the same!

answer <- "Unknown n and no difference = no relationship"
print(answer)
## [1] "Unknown n and no difference = no relationship"

2e) A student finds a correlation of r = .04 between the number of units the students in his research methods class are taking and the students’ level of stress.

answer <- "Unknown n and small effect size  = no relationship"
print(answer)
## [1] "Unknown n and small effect size  = no relationship"

5) Practice: Decide whether each of the following Pearson’s r values is statistically significant for both a one-tailed and a two-tailed test.

Writing a function to calculate the t value and report significance (assuming t(1.96) as the cutoff)

p_of_t <- function(r, n) {
  num <- r *(sqrt(n - 2))
  denom <- sqrt(1 - (r *r))
  t <- num/denom
  print("t-value:")
  print(t)
  if (t > 1.96) {
    print("p(t) <= 0.05")
  }
  if (t <= 1.96){
    print("p(t) >= 0.05")
  }
        }

5a) The correlation between height and IQ is +.13 in a sample of 35.

p_of_t(0.13, 35)
## [1] "t-value:"
## [1] 0.7531847
## [1] "p(t) >= 0.05"

5b) For a sample of 88 university students, the correlation between how disgusted they felt and the harshness of their moral judgments was +.23.

p_of_t(0.23, 88)
## [1] "t-value:"
## [1] 2.19169
## [1] "p(t) <= 0.05"

5c) The correlation between the number of daily hassles and positive mood is −.43 for a sample of 30 middle-aged adults.

p_of_t(-0.43, 30)
## [1] "t-value:"
## [1] -2.520241
## [1] "p(t) >= 0.05"

6) Discussion: A researcher compares the effectiveness of two forms of psychotherapy for social phobia using an independent-samples t-test.

Explain what it would mean for the researcher to commit a Type I error.

Explain what it would mean for the researcher to commit a Type II error.

type1 <- "Type 1 error means that the researcher incorrectly rejects the null hypothesis. He would conclude that there is an effect of psychotherapy on social phobia when there truly is not."

type2 <- "Type 2 error means that the researcher incorrectly retains the null hypothesis. He would conclude that there is no effect of psychotherapy on social phobia when there actually is."

print(c(type1, type2))
## [1] "Type 1 error means that the researcher incorrectly rejects the null hypothesis. He would conclude that there is an effect of psychotherapy on social phobia when there truly is not."
## [2] "Type 2 error means that the researcher incorrectly retains the null hypothesis. He would conclude that there is no effect of psychotherapy on social phobia when there actually is."

7) Discussion: Imagine that you conduct a t-test and the p value is .02. How could you explain what this p value means to someone who is not already familiar with null hypothesis testing? Be sure to avoid the common misinterpretations of the p value.

answer <- "A p-value of 0.02 indicates that you might be able to accurately reject the null hypothesis. A low p-value means that your particular data is unlikely to have occured in the scenario where the null hypothesis is true. A p-value is not the error rate. A p-value of 0.05 leaves you with a 23% - 50% chance of incorrectly rejecting the null (Selke et al, 2001). "

print(answer)
## [1] "A p-value of 0.02 indicates that you might be able to accurately reject the null hypothesis. A low p-value means that your particular data is unlikely to have occured in the scenario where the null hypothesis is true. A p-value is not the error rate. A p-value of 0.05 leaves you with a 23% - 50% chance of incorrectly rejecting the null (Selke et al, 2001). "

Resources

Selke, T., Bayarri, M. J., & Berger, J. (2001) Calibration of p values for precise testing of null hypotheses. The American Statistician: 55(1) Retrieved from: http://www.dcscience.net/Sellke-Bayarri-Berger-calibration-of-P-2001.pdf