d = read.table("https://personality-project.org/r/datasets/maps.mixx.epi.bfi.data",
header = TRUE)
attach(d)
summary(d)
## epiE epiS epiImp epilie
## Min. : 1.0 Min. : 0.00 Min. :0.00 Min. :0.00
## 1st Qu.:11.0 1st Qu.: 6.00 1st Qu.:3.00 1st Qu.:1.00
## Median :14.0 Median : 8.00 Median :4.00 Median :2.00
## Mean :13.3 Mean : 7.58 Mean :4.37 Mean :2.38
## 3rd Qu.:16.0 3rd Qu.: 9.50 3rd Qu.:6.00 3rd Qu.:3.00
## Max. :22.0 Max. :13.00 Max. :9.00 Max. :7.00
## epiNeur bfagree bfcon bfext bfneur
## Min. : 0.0 Min. : 74 Min. : 53 Min. : 8.0 Min. : 34
## 1st Qu.: 7.0 1st Qu.:112 1st Qu.: 99 1st Qu.: 87.5 1st Qu.: 70
## Median :10.0 Median :126 Median :114 Median :104.0 Median : 90
## Mean :10.4 Mean :125 Mean :113 Mean :102.2 Mean : 88
## 3rd Qu.:14.0 3rd Qu.:136 3rd Qu.:128 3rd Qu.:118.0 3rd Qu.:104
## Max. :23.0 Max. :167 Max. :178 Max. :168.0 Max. :152
## bfopen bdi traitanx stateanx
## Min. : 73 Min. : 0.00 Min. :22 Min. :21.0
## 1st Qu.:110 1st Qu.: 3.00 1st Qu.:32 1st Qu.:32.0
## Median :125 Median : 6.00 Median :38 Median :38.0
## Mean :123 Mean : 6.78 Mean :39 Mean :39.9
## 3rd Qu.:136 3rd Qu.: 9.00 3rd Qu.:44 3rd Qu.:46.5
## Max. :173 Max. :27.00 Max. :71 Max. :79.0
1) How many variables are there and what are their names? There are 13 variables: epiE, epiS, epiImp, epilie, epiNeur, bfagree, bfcon, bfext, bfneur, bfopen, bdi, traitanx, stateanx
2) How many subjects are there and what are their names? There are 231 subjects, identified by assigned id numbers in the “row.names” attribute of the data frame:
row.names(d)
## [1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11"
## [12] "12" "13" "14" "15" "16" "17" "18" "19" "20" "21" "22"
## [23] "23" "24" "25" "26" "27" "28" "29" "30" "31" "32" "33"
## [34] "34" "35" "36" "37" "38" "39" "40" "41" "42" "43" "44"
## [45] "45" "46" "47" "48" "49" "50" "51" "52" "53" "54" "55"
## [56] "56" "57" "58" "59" "60" "61" "62" "63" "64" "65" "66"
## [67] "67" "68" "69" "70" "71" "72" "73" "74" "75" "76" "77"
## [78] "78" "79" "80" "81" "82" "83" "84" "85" "86" "87" "88"
## [89] "89" "90" "91" "92" "93" "94" "95" "96" "97" "98" "99"
## [100] "100" "101" "102" "103" "104" "105" "106" "107" "108" "109" "110"
## [111] "111" "112" "113" "114" "115" "116" "117" "118" "119" "120" "121"
## [122] "122" "123" "124" "125" "126" "127" "128" "129" "130" "131" "132"
## [133] "133" "134" "135" "136" "137" "138" "139" "140" "141" "142" "143"
## [144] "144" "145" "146" "147" "148" "149" "150" "151" "152" "153" "154"
## [155] "155" "156" "157" "158" "159" "160" "161" "162" "163" "164" "165"
## [166] "166" "167" "168" "169" "170" "171" "172" "173" "174" "175" "176"
## [177] "177" "178" "179" "180" "181" "182" "183" "184" "185" "186" "187"
## [188] "188" "189" "190" "191" "192" "193" "194" "195" "196" "197" "198"
## [199] "199" "200" "201" "202" "203" "204" "205" "206" "207" "208" "209"
## [210] "210" "211" "212" "213" "214" "215" "216" "217" "218" "219" "220"
## [221] "221" "222" "223" "224" "225" "226" "227" "228" "229" "230" "231"
3) What is the mean + 2 standard deviations for trait anxiety? Mean trait anxiety is 39.01, standard deviation is 9.52. Any scores below 19.97 or above 58.05 are outside two standard deviations
4) Create a subset of the data that excludes all participants outside of two standard deviations from the mean for trait anxiety. Note: Later plots will use original scores, not the trimmed subset.
cutoffs = c(mean(traitanx) - 2 * sd(traitanx), mean(traitanx) + 2 * sd(traitanx))
dTrim = d[(cutoffs[1] <= traitanx) & (traitanx <= cutoffs[2]), ]
par(mfcol = c(1, 2))
hist(traitanx, main = "Trait Anxiety (Original)", xlab = "Score", xlim = c(0,
80), ylim = c(0, 60))
segments(c(cutoffs[1], cutoffs[2]), c(0, 0), y1 = c(60, 60), col = "red", lty = 5,
lwd = 2.5)
text(75, 45, c("> 2 SD"), col = "red")
arrows(c(15, 60), c(40, 40), c(0, 75), col = "red", length = 0.1, lwd = 2)
hist(dTrim$traitanx, main = "Trait Anxiety (Trimmed)", xlab = "Score", xlim = c(0,
70), ylim = c(0, 60), fg = "red", border = "red", col.axis = "red", col.lab = "red",
col.main = "red")
detach(d)
attach(dTrim)
With regression line, formula and correlation
par(mfcol = c(1, 1))
plot(stateanx ~ traitanx, main = "State Anxiety as a Function of Trait Anxiety",
xlab = "Trait Anxiety", ylab = "State Anxiety", xlim = c(0, 80), ylim = c(0,
80), pch = 20, cex = 0.5)
StTr = lm(stateanx ~ traitanx)
abline(StTr, col = "red")
fStTr = paste("State = ", round(StTr$coefficients[2], digits = 2), "Trait + ",
round(StTr$coefficients[1], digits = 2), sep = "")
corStTr = paste("r =", round(cor(stateanx, traitanx), digits = 2))
legend("topleft", c(fStTr, corStTr), bty = "n", text.col = "red")
With regression line, formula and correlation
plot(traitanx ~ bfneur, main = "Trait Anxiety as a Function of Neuroticism",
xlab = "Neuroticism (Big-Five Peronality)", ylab = "Trait Anxiety", xlim = c(0,
160), ylim = c(0, 80), pch = 20, cex = 0.5)
AnxNeur = lm(traitanx ~ bfneur)
abline(AnxNeur, col = "red")
fAnxNeur = paste("TrAnx= ", round(AnxNeur$coefficients[2], digits = 2), "Neur + ",
round(AnxNeur$coefficients[1], digits = 2), sep = "")
corAnxNeur = paste("r =", round(cor(traitanx, bfneur), digits = 2))
legend("topleft", c(fAnxNeur, corAnxNeur), bty = "n", text.col = "red")