rm(list = ls())
library(tibble);library(tidytext);library(textdata);library(tidyverse);library(ggplot2);library(caret);library(gtsummary);library(ggthemes);library(stringr);library(reshape2);library(data.table);library(faraway);library(ellipse);library(MASS);library(jtools);library(kableExtra);library(psych)

Read in the data

data(rock)
data(prostate)

a. An initial data analysis that explores the numerical and graphical characteristics of the data.

df <- rock
#tbl_summary(df)
#plot data
#do descriptive stats

b. Variable selection to choose the best model

#it's mad goofy to do feature selection on a dataframe with 3 predictors 

m1.full <- lm(perm~.,data=df)

m1 <- stepAIC(m1.full, direction = "backward", trace = FALSE)
m2 <- stepAIC(m1.full,direction="both",trace=FALSE)

#Summary for Backwards selection Model:
#summ(m1, digits=4)

#Summary for stepwise selection 
#summ(m2, digits=4)

c. An exploration of transformations to improve the fit of the model

#box-cox transformation

#find optimal lambda for Box-Cox transformation 
bc <- boxcox(perm~.,data=df)

lambda <- bc$x[which.max(bc$y)]

#fit new linear regression model using the Box-Cox transformation
m1.transformed <- lm(((perm^lambda-1)/lambda) ~ .,data=df)

#summary of new transformed model
summ(m1.transformed, digits=4)
Observations 48
Dependent variable ((perm^lambda - 1)/lambda)
Type OLS linear regression
F(3,44) 52.0599
0.7802
Adj. R² 0.7652
Est. S.E. t val. p
(Intercept) 11.5963 1.5529 7.4676 0.0000
area 0.0014 0.0002 5.6451 0.0000
peri -0.0046 0.0005 -9.2068 0.0000
shape 5.5907 4.9697 1.1250 0.2667
Standard errors: OLS
#from here you could do feature selection again on your 3 transformed predictors

d. Diagnostics to check the assumptions of your model.

plot(m1.transformed)

e. Some predictions of future observations for interesting values of the predictors.

#not sure what this means

f. An interpretation of the meaning of the model with respect to the particular area of application.

With the Box-Cox transformed model, we see that pore space area and perimeter in pixels of each core sample act as significant predictors of rock permeability, measured in milli-Darcies.