WHAT IMPACT DOES BODY WEIGHT HAVE ON BRAIN WEIGHT?

Load data from URL

options(warn=-1)
require(knitr);
## Loading required package: knitr
weights <- read.table("http://people.sc.fsu.edu/~jburkardt/datasets/regression/x01.txt", skip = 32, header = TRUE, sep = "")
kable(head(weights));
Body Weight
3.385 44.5
0.480 15.5
1.350 8.1
465.000 423.0
36.330 119.5
27.660 115.0
names(weights);
## [1] "Body"   "Weight"

** Load require package.**

library(plyr);

Note the data columns name came with Body & Weight, but its actually “BrainWeight &”BodyWeight" in the data set, therefore, we need to make correction.

names(weights)[names(weights)=="Body"] <- "BrainWeight";
names(weights)[names(weights)=="Weight"] <- "BodyWeight";
str(weights);
## 'data.frame':    62 obs. of  2 variables:
##  $ BrainWeight: num  3.38 0.48 1.35 465 36.33 ...
##  $ BodyWeight : num  44.5 15.5 8.1 423 119.5 ...
names(weights);
## [1] "BrainWeight" "BodyWeight"
kable(head(weights));
BrainWeight BodyWeight
3.385 44.5
0.480 15.5
1.350 8.1
465.000 423.0
36.330 119.5
27.660 115.0

Lets checkout its histogram.

hist(weights$BrainWeight);

hist(weights$BodyWeight);

We can deduce that it a Very Rightly Skewed data, therefore, an intervention is required.let get the Natural logarithm of the data set.

weights_log <- cbind(weights, log(weights$BrainWeight), log(weights$BodyWeight));

kable(head(weights_log));
BrainWeight BodyWeight log(weights$BrainWeight) log(weights$BodyWeight)
3.385 44.5 1.2193539 3.795489
0.480 15.5 -0.7339692 2.740840
1.350 8.1 0.3001046 2.091864
465.000 423.0 6.1420374 6.047372
36.330 119.5 3.5926438 4.783316
27.660 115.0 3.3199873 4.744932
library(scatterplot3d);

attach(weights_log);
scatterplot3d(BrainWeight, BodyWeight, pch = 20, highlight.3d = TRUE, type = "h", main = "3D ScatterPlots"); 

weights_log[,c("BrainWeight","BodyWeight")] <- list(NULL);
colnames(weights_log);
## [1] "log(weights$BrainWeight)" "log(weights$BodyWeight)"
a <- plot(weights_log, ylab="Brain Weight",
   plot.type="double", col=1:2, xlab="Body Weight")
legend("topleft", legend=c("Brain Weight","Body Weight"),
  lty=1, col=c(1,2), cex=.8)
abline(a)

names(weights_log)[names(weights_log)=="log(weights$BrainWeight)"] <- "BrainWeights";
names(weights_log)[names(weights_log)=="log(weights$BodyWeight)"] <- "BodyWeight";

kable(head(weights_log));
BrainWeights BodyWeight
1.2193539 3.795489
-0.7339692 2.740840
0.3001046 2.091864
6.1420374 6.047372
3.5926438 4.783316
3.3199873 4.744932
cor(weights_log, use="complete.obs", method="kendall")
##              BrainWeights BodyWeight
## BrainWeights    1.0000000  0.8334657
## BodyWeight      0.8334657  1.0000000
x <- weights_log$BodyWeight;

hist(x, 
 xlim=c(min(x),max(x)), probability=T, 
   col='purple', xlab='Body Weight', ylab=' Frequency', axes=T,
   main='Natural Logarithm: Multi-modal')
lines(density(x,bw=1), col='red', lwd=2)

mode_1 <- table(as.vector(x));
names(mode_1)[mode_1 == max(mode_1)];
## [1] "0"                "2.50959926237837" "4.74493212836325"
y <- weights_log$BrainWeight;

hist(y, 
 xlim=c(min(y),max(y)), probability=T, 
   col='purple', xlab='Brain Weight', ylab=' Frequency', axes=T,
   main='Natural Logarithm: Bi-modal')
lines(density(y,bw=1), col='red', lwd=2)

kable(summary(weights_log));
BrainWeights BodyWeight
Min. :-5.2983 Min. :-1.966
1st Qu.:-0.5203 1st Qu.: 1.442
Median : 1.2066 Median : 2.848
Mean : 1.3375 Mean : 3.140
3rd Qu.: 3.8639 3rd Qu.: 5.111
Max. : 8.8030 Max. : 8.650
mode_2 <- table(as.vector(y));
names(mode_2)[mode_2 == max(mode_2)];
## [1] "-3.77226106305299" "1.25276296849537"
suppressMessages(library(forecast));

Acf(weights_log$BodyWeight, lag.max=NULL, type=c("correlation", "partial"), plot=TRUE, main=NULL, xlim=NULL, ylim=NULL, xlab="Lag", ylab=NULL, na.action=na.contiguous); 

taperedacf(weights_log$BodyWeight, lag.max=NULL, type=c("correlation", "partial"), plot=TRUE, calc.ci=TRUE, level=95, nsim=100, xlim=NULL, ylim=NULL, xlab="Lag", ylab=NULL); 

fcast <- forecast(weights_log$BodyWeight, h = ifelse(frequency(weights_log$BodyWeight) > 1, 2 * frequency(weights_log$BodyWeight), 10) , level=c(68, 95, 99.7), fan=FALSE, robust=FALSE, lambda=NULL, find.frequency=FALSE, allow.multiplicative.trend=FALSE);

fcast;
##    Point Forecast     Lo 68    Hi 68     Lo 95    Hi 95   Lo 99.7  Hi 99.7
## 63        3.14003 0.7266564 5.553403 -1.616456 7.896516 -4.062145 10.34220
## 64        3.14003 0.7266564 5.553403 -1.616456 7.896516 -4.062145 10.34220
## 65        3.14003 0.7266564 5.553403 -1.616456 7.896516 -4.062145 10.34221
## 66        3.14003 0.7266564 5.553403 -1.616456 7.896516 -4.062145 10.34221
## 67        3.14003 0.7266564 5.553403 -1.616456 7.896516 -4.062145 10.34221
## 68        3.14003 0.7266564 5.553403 -1.616456 7.896516 -4.062145 10.34221
## 69        3.14003 0.7266564 5.553403 -1.616456 7.896516 -4.062145 10.34221
## 70        3.14003 0.7266564 5.553403 -1.616456 7.896516 -4.062145 10.34221
## 71        3.14003 0.7266563 5.553403 -1.616456 7.896516 -4.062146 10.34221
## 72        3.14003 0.7266563 5.553403 -1.616456 7.896516 -4.062146 10.34221

CONCLUSION

References:

DATA SOURCE: http://people.sc.fsu.edu/~jburkardt/datasets/regression/x01.txt

http://r.789695.n4.nabble.com/converting-character-to-numeric-td3615259.html

http://stackoverflow.com/questions/4605206/drop-data-frame-columns-by-name

https://cran.r-project.org/web/packages/forecast/forecast.pdf