library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(infer)
library(dplyr)
library(EnvStats)
## 
## Attaching package: 'EnvStats'
## 
## The following objects are masked from 'package:stats':
## 
##     predict, predict.lm
## 
## The following object is masked from 'package:base':
## 
##     print.default
### This begins the Computational Problems
###Computational Problem 1
Computational1 <- read.csv("Computational1.csv")
plot(Computational1$X, Computational1$Y,
     xlab = "Number of Credit Cards",
     ylab = "Number of Credit Cards with Balances",
     main = "Scatterplot")

### Problem 1, part f - Compute the covariance
### The formula is at page 368 of the text. Essentially, sum the variances of x and y,  ###multiply them, and divide by n-1. The answer I got is 58.3; see below line 75.
### For x:
(5+6+4+8+2) /5
## [1] 5
### For y:
(4+1+3+7+2) /5
## [1] 3.4
5-5
## [1] 0
6-5
## [1] 1
4-5
## [1] -1
8-5
## [1] 3
2-5
## [1] -3
4-3.4
## [1] 0.6
1-3.4
## [1] -2.4
3-3.4
## [1] -0.4
7-3.4
## [1] 3.6
2-3.4
## [1] -1.4
### Square the values: for X, the squares sum to 11;
### for Y, squares computed as follows:
.6^2
## [1] 0.36
(-2.4)^2
## [1] 5.76
(-.4)^2
## [1] 0.16
3.6^2
## [1] 12.96
(-1.4)^2
## [1] 1.96
.36+5.76+.16+12.96+1.96
## [1] 21.2
(11*21.2) /(5-1)
## [1] 58.3
### Parts g & h to question 1. What I don't understand about the output is how I can 
### have a correlation coefficient of .63 but not have statistical significance. Is it b/c
### there are only 5 observations?
cor.test(Computational1$X, Computational1$Y,
         use = "everything",
         method = "pearson",
         conf.level = 0.95)
## 
##  Pearson's product-moment correlation
## 
## data:  Computational1$X and Computational1$Y
## t = 1.41, df = 3, p-value = 0.2533
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.5664430  0.9721233
## sample estimates:
##       cor 
## 0.6313358
cor.test(Computational1$X, Computational1$Y,
         use = "everything",
         method = "spearman",
         conf.level = 0.95)
## 
##  Spearman's rank correlation rho
## 
## data:  Computational1$X and Computational1$Y
## S = 12, p-value = 0.5167
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho 
## 0.4
### Problem 3: This involves using the formula at page 371 of the text. With a sample size
### of 30, I have 28 in the numerator. With a r value of .6, I have 1 - .36 in the 
### denominator. I calculate as follows:

.6*.6
## [1] 0.36
28/(1-.36)
## [1] 43.75
sqrt(43.75)
## [1] 6.614378
### The critical value is (I think) 2.46714. Since my test statistic is > that, I 
### reject the null hypothesis that the population Pearson correlation coefficient = 0. 
### Problem 5.
Problem5 <- read.csv("Problem5.csv")
plot(Problem5$Coins, Problem5$Bills,
    xlab = "Number of Coins",
     ylab = "Number of Bills",
     main = "Scatterplot")

### Given the curvilinear nature of the scatterplot, I would guess the correlation coefficient is close to zero.
### Problem 7. 
kindergarten <- read.csv("kindergarten.csv")
cor.test(kindergarten$Letters, kindergarten$Words,
         use = "everything",
         method = "pearson",
         conf.level = 0.95)
## 
##  Pearson's product-moment correlation
## 
## data:  kindergarten$Letters and kindergarten$Words
## t = 3.5344, df = 8, p-value = 0.007682
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2972960 0.9455644
## sample estimates:
##      cor 
## 0.780773
### With a correlation coefficient of .78, according to the text at page 380, the effect size is fairly strong. 
### Problem 9.

adults <- read.csv("adults.csv")
cor.test(adults$Detail, adults$ThingsDone,
         use = "everything",
         method = "kendall",
         conf.level = 0.95)
## Warning in cor.test.default(adults$Detail, adults$ThingsDone, use =
## "everything", : Cannot compute exact p-value with ties
## 
##  Kendall's rank correlation tau
## 
## data:  adults$Detail and adults$ThingsDone
## z = 0.71464, p-value = 0.4748
## alternative hypothesis: true tau is not equal to 0
## sample estimates:
##       tau 
## 0.2059715
### Given the result, I would say it is a moderate effect, even though the text says (at p.
### 380) 5 is a strong effect. I don't consider 5 a strong effect, and this is actually not
### quite 5. 
### This begins the Interpretive Section

msdlabs <- read.csv("msdlabs.csv")
plot(msdlabs$IQ, msdlabs$SELFCONCEPT,
    xlab = "IQ",
     ylab = "Self Concept",
     main = "Scatterplot")

cor.test(msdlabs$IQ, msdlabs$SELFCONCEPT,
         use = "everything",
         method = "pearson",
         conf.level = 0.95)
## 
##  Pearson's product-moment correlation
## 
## data:  msdlabs$IQ and msdlabs$SELFCONCEPT
## t = 0.49515, df = 1554, p-value = 0.6206
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.03715767  0.06221481
## sample estimates:
##        cor 
## 0.01255958
### The scatterplot shows the same result as the correlation: there is no correlation
### between IQ and Self Concept. The correlation coefficient is nearly zero, showing no
### correlation. According to the text at page 380, a correlation of .1 is a weak effect; 
### this correlation is 0.01, essentially no effect. The scatterplot is horizontal with
### a wide dispersion of observations, confirming the lack of correlation between the 2
### variables.