library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(infer)
library(dplyr)
library(EnvStats)
##
## Attaching package: 'EnvStats'
##
## The following objects are masked from 'package:stats':
##
## predict, predict.lm
##
## The following object is masked from 'package:base':
##
## print.default
### This begins the Computational Problems
###Computational Problem 1
Computational1 <- read.csv("Computational1.csv")
plot(Computational1$X, Computational1$Y,
xlab = "Number of Credit Cards",
ylab = "Number of Credit Cards with Balances",
main = "Scatterplot")

### Problem 1, part f - Compute the covariance
### The formula is at page 368 of the text. Essentially, sum the variances of x and y, ###multiply them, and divide by n-1. The answer I got is 58.3; see below line 75.
### For x:
(5+6+4+8+2) /5
## [1] 5
### For y:
(4+1+3+7+2) /5
## [1] 3.4
5-5
## [1] 0
6-5
## [1] 1
4-5
## [1] -1
8-5
## [1] 3
2-5
## [1] -3
4-3.4
## [1] 0.6
1-3.4
## [1] -2.4
3-3.4
## [1] -0.4
7-3.4
## [1] 3.6
2-3.4
## [1] -1.4
### Square the values: for X, the squares sum to 11;
### for Y, squares computed as follows:
.6^2
## [1] 0.36
(-2.4)^2
## [1] 5.76
(-.4)^2
## [1] 0.16
3.6^2
## [1] 12.96
(-1.4)^2
## [1] 1.96
.36+5.76+.16+12.96+1.96
## [1] 21.2
(11*21.2) /(5-1)
## [1] 58.3
### Parts g & h to question 1. What I don't understand about the output is how I can
### have a correlation coefficient of .63 but not have statistical significance. Is it b/c
### there are only 5 observations?
cor.test(Computational1$X, Computational1$Y,
use = "everything",
method = "pearson",
conf.level = 0.95)
##
## Pearson's product-moment correlation
##
## data: Computational1$X and Computational1$Y
## t = 1.41, df = 3, p-value = 0.2533
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.5664430 0.9721233
## sample estimates:
## cor
## 0.6313358
cor.test(Computational1$X, Computational1$Y,
use = "everything",
method = "spearman",
conf.level = 0.95)
##
## Spearman's rank correlation rho
##
## data: Computational1$X and Computational1$Y
## S = 12, p-value = 0.5167
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.4
### Problem 3: This involves using the formula at page 371 of the text. With a sample size
### of 30, I have 28 in the numerator. With a r value of .6, I have 1 - .36 in the
### denominator. I calculate as follows:
.6*.6
## [1] 0.36
28/(1-.36)
## [1] 43.75
sqrt(43.75)
## [1] 6.614378
### The critical value is (I think) 2.46714. Since my test statistic is > that, I
### reject the null hypothesis that the population Pearson correlation coefficient = 0.
### Problem 5.
Problem5 <- read.csv("Problem5.csv")
plot(Problem5$Coins, Problem5$Bills,
xlab = "Number of Coins",
ylab = "Number of Bills",
main = "Scatterplot")

### Given the curvilinear nature of the scatterplot, I would guess the correlation coefficient is close to zero.
### Problem 7.
kindergarten <- read.csv("kindergarten.csv")
cor.test(kindergarten$Letters, kindergarten$Words,
use = "everything",
method = "pearson",
conf.level = 0.95)
##
## Pearson's product-moment correlation
##
## data: kindergarten$Letters and kindergarten$Words
## t = 3.5344, df = 8, p-value = 0.007682
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2972960 0.9455644
## sample estimates:
## cor
## 0.780773
### With a correlation coefficient of .78, according to the text at page 380, the effect size is fairly strong.
### Problem 9.
adults <- read.csv("adults.csv")
cor.test(adults$Detail, adults$ThingsDone,
use = "everything",
method = "kendall",
conf.level = 0.95)
## Warning in cor.test.default(adults$Detail, adults$ThingsDone, use =
## "everything", : Cannot compute exact p-value with ties
##
## Kendall's rank correlation tau
##
## data: adults$Detail and adults$ThingsDone
## z = 0.71464, p-value = 0.4748
## alternative hypothesis: true tau is not equal to 0
## sample estimates:
## tau
## 0.2059715
### Given the result, I would say it is a moderate effect, even though the text says (at p.
### 380) 5 is a strong effect. I don't consider 5 a strong effect, and this is actually not
### quite 5.
### This begins the Interpretive Section
msdlabs <- read.csv("msdlabs.csv")
plot(msdlabs$IQ, msdlabs$SELFCONCEPT,
xlab = "IQ",
ylab = "Self Concept",
main = "Scatterplot")

cor.test(msdlabs$IQ, msdlabs$SELFCONCEPT,
use = "everything",
method = "pearson",
conf.level = 0.95)
##
## Pearson's product-moment correlation
##
## data: msdlabs$IQ and msdlabs$SELFCONCEPT
## t = 0.49515, df = 1554, p-value = 0.6206
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.03715767 0.06221481
## sample estimates:
## cor
## 0.01255958
### The scatterplot shows the same result as the correlation: there is no correlation
### between IQ and Self Concept. The correlation coefficient is nearly zero, showing no
### correlation. According to the text at page 380, a correlation of .1 is a weak effect;
### this correlation is 0.01, essentially no effect. The scatterplot is horizontal with
### a wide dispersion of observations, confirming the lack of correlation between the 2
### variables.