title: “correlation” author: “jagadish” date: “November 9, 2015” output: html_document

library("MASS")
data(cats)
str(cats)
## 'data.frame':    144 obs. of  3 variables:
##  $ Sex: Factor w/ 2 levels "F","M": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Bwt: num  2 2 2 2.1 2.1 2.1 2.1 2.1 2.1 2.1 ...
##  $ Hwt: num  7 7.4 9.5 7.2 7.3 7.6 8.1 8.2 8.3 8.5 ...
summary(cats)
##  Sex         Bwt             Hwt       
##  F:47   Min.   :2.000   Min.   : 6.30  
##  M:97   1st Qu.:2.300   1st Qu.: 8.95  
##         Median :2.700   Median :10.10  
##         Mean   :2.724   Mean   :10.63  
##         3rd Qu.:3.025   3rd Qu.:12.12  
##         Max.   :3.900   Max.   :20.50
with(cats,plot(Bwt,Hwt))
title(main="Heart Weight (g) vs. Body Weight (kg)\nof Domestic Cats")

with(cats, plot(Hwt ~ Bwt))

with(cats, cor(Bwt, Hwt))
## [1] 0.8041274
with(cats, cor(Bwt, Hwt))^2
## [1] 0.6466209
with(cats, cor.test(Bwt, Hwt))
## 
##  Pearson's product-moment correlation
## 
## data:  Bwt and Hwt
## t = 16.119, df = 142, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.7375682 0.8552122
## sample estimates:
##       cor 
## 0.8041274
with(cats, cor.test(Bwt, Hwt, alternative="greater", conf.level=.8))
## 
##  Pearson's product-moment correlation
## 
## data:  Bwt and Hwt
## t = 16.119, df = 142, p-value < 2.2e-16
## alternative hypothesis: true correlation is greater than 0
## 80 percent confidence interval:
##  0.7776141 1.0000000
## sample estimates:
##       cor 
## 0.8041274
with(cats, cor.test(~ Bwt + Hwt))   
## 
##  Pearson's product-moment correlation
## 
## data:  Bwt and Hwt
## t = 16.119, df = 142, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.7375682 0.8552122
## sample estimates:
##       cor 
## 0.8041274
with(cats, cor.test(~ Bwt + Hwt, subset=(Sex=="F")))
## 
##  Pearson's product-moment correlation
## 
## data:  Bwt and Hwt
## t = 4.2152, df = 45, p-value = 0.0001186
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2890452 0.7106399
## sample estimates:
##       cor 
## 0.5320497
with(cats, plot(Bwt, Hwt, type="n", xlab="Body Weight in kg",  ylab="Heart Weight in g", main="Heart Weight vs. Body Weight of Cats"))
with(cats,points(Bwt[Sex=="F"],Hwt[Sex=="F"],pch=16,col="red"))
with(cats,points(Bwt[Sex=="M"],Hwt[Sex=="M"],pch=17,col="blue"))

rm(cats)
data(cement)
str(cement)
## 'data.frame':    13 obs. of  5 variables:
##  $ x1: int  7 1 11 11 7 11 3 1 2 21 ...
##  $ x2: int  26 29 56 31 52 55 71 31 54 47 ...
##  $ x3: int  6 15 8 8 6 9 17 22 18 4 ...
##  $ x4: int  60 52 20 47 33 22 6 44 22 26 ...
##  $ y : num  78.5 74.3 104.3 87.6 95.9 ...
cor(cement)
##            x1         x2         x3         x4          y
## x1  1.0000000  0.2285795 -0.8241338 -0.2454451  0.7307175
## x2  0.2285795  1.0000000 -0.1392424 -0.9729550  0.8162526
## x3 -0.8241338 -0.1392424  1.0000000  0.0295370 -0.5346707
## x4 -0.2454451 -0.9729550  0.0295370  1.0000000 -0.8213050
## y   0.7307175  0.8162526 -0.5346707 -0.8213050  1.0000000
cov(cement)
##           x1         x2         x3          x4          y
## x1  34.60256   20.92308 -31.051282  -24.166667   64.66346
## x2  20.92308  242.14103 -13.878205 -253.416667  191.07949
## x3 -31.05128  -13.87821  41.025641    3.166667  -51.51923
## x4 -24.16667 -253.41667   3.166667  280.166667 -206.80833
## y   64.66346  191.07949 -51.519231 -206.808333  226.31359
cov.matr = cov(cement)
cov2cor(cov.matr)
##            x1         x2         x3         x4          y
## x1  1.0000000  0.2285795 -0.8241338 -0.2454451  0.7307175
## x2  0.2285795  1.0000000 -0.1392424 -0.9729550  0.8162526
## x3 -0.8241338 -0.1392424  1.0000000  0.0295370 -0.5346707
## x4 -0.2454451 -0.9729550  0.0295370  1.0000000 -0.8213050
## y   0.7307175  0.8162526 -0.5346707 -0.8213050  1.0000000
pairs(cement)