#the course url has a very detailed explanation of
#correlation analysis
#https://onlinecourses.science.psu.edu/stat501/node/284

#import data from url into R Studio using read.table function
iqSize<-read.table("https://onlinecourses.science.psu.edu/stat501/sites/onlinecourses.science.psu.edu.stat501/files/data/iqsize.txt", header = TRUE)

#check dataframe
#this is an American dataset, so the participant's weight is in pounds not kilos!
head(iqSize, 3)
##   PIQ  Brain Height Weight
## 1 124  81.69   64.5    118
## 2 150 103.84   73.3    143
## 3 128  96.54   68.8    172
#inspect the structure of your dataset
str(iqSize)
## 'data.frame':    38 obs. of  4 variables:
##  $ PIQ   : int  124 150 128 134 110 131 98 84 147 124 ...
##  $ Brain : num  81.7 103.8 96.5 95.2 92.9 ...
##  $ Height: num  64.5 73.3 68.8 65 69 64.5 66 66.3 68.8 64.5 ...
##  $ Weight: int  118 143 172 147 146 138 175 134 172 118 ...
#use the summary function to get a run down on your dataset
#it provides a summary of all the data in your dataset
summary(iqSize)
##       PIQ             Brain            Height          Weight     
##  Min.   : 72.00   Min.   : 79.06   Min.   :62.00   Min.   :106.0  
##  1st Qu.: 89.25   1st Qu.: 85.48   1st Qu.:66.00   1st Qu.:135.2  
##  Median :115.00   Median : 90.54   Median :68.00   Median :146.5  
##  Mean   :111.34   Mean   : 90.68   Mean   :68.42   Mean   :151.1  
##  3rd Qu.:128.00   3rd Qu.: 94.95   3rd Qu.:70.38   3rd Qu.:172.0  
##  Max.   :150.00   Max.   :107.95   Max.   :77.00   Max.   :192.0
#use the plot function to plot all your variables in a scatterplot
#This plot allows us to visualize the relationship among all variables in one image.
#We can see that height and weight suggests a positive correlation.
#4th column, 3rd row from the top.
plot(iqSize)

#let's calculate correlation
corr<-cor(iqSize)

#inspect matrix
corr
##                 PIQ     Brain      Height      Weight
## PIQ     1.000000000 0.3778155 -0.09315559 0.002512154
## Brain   0.377815463 1.0000000  0.58836684 0.513486971
## Height -0.093155590 0.5883668  1.00000000 0.699614004
## Weight  0.002512154 0.5134870  0.69961400 1.000000000
#install ggcorrplot if needed
#if(!require(devtools)) install.packages("devtools")
#devtools::install_github("kassambara/ggcorrplot")

#load visualization libraries
library(ggplot2)

library(ggcorrplot)

#plot the correlation matrix visual
ggcorrplot(corr)

#add correlation coefficients & reorder matrix using hierarchical clustering
ggcorrplot(corr, hc.order = TRUE, type = "lower",
           lab = TRUE)

#you can also plot the matrix with circles
ggcorrplot(corr, lab = TRUE, type = "lower", method="circle")

#You can see a moderate to strong correlation between height and weight.
#Now you have your correlation matrix with the corresponding correlation coefficients for easy visualization


#if you want to continue the example on the Stats 501 page found here: (https://onlinecourses.science.psu.edu/stat501/node/284) and get your regression equation, residuals, and R-squared
#use the fit function to  run your regression analysis in R similar to the example
#shown using Minitab Statistical Software

fit <- lm(PIQ~ Brain + Height + Weight, data=iqSize)
summary(fit)
## 
## Call:
## lm(formula = PIQ ~ Brain + Height + Weight, data = iqSize)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -32.74 -12.09  -3.84  14.17  51.69 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.114e+02  6.297e+01   1.768 0.085979 .  
## Brain        2.060e+00  5.634e-01   3.657 0.000856 ***
## Height      -2.732e+00  1.229e+00  -2.222 0.033034 *  
## Weight       5.599e-04  1.971e-01   0.003 0.997750    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 19.79 on 34 degrees of freedom
## Multiple R-squared:  0.2949, Adjusted R-squared:  0.2327 
## F-statistic: 4.741 on 3 and 34 DF,  p-value: 0.007215
vcov(fit)
##             (Intercept)       Brain      Height      Weight
## (Intercept) 3965.358990 -8.11822511 -58.9794977  5.40551851
## Brain         -8.118225  0.31747232  -0.2588663 -0.01957544
## Height       -58.979498 -0.25886634   1.5114948 -0.13879702
## Weight         5.405519 -0.01957544  -0.1387970  0.03883506