## -- Attaching packages ----------------------------------------------------------------------------------- tidyverse 1.3.0 --

## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.0.1     v dplyr   1.0.0
## v tidyr   1.1.0     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0

## -- Conflicts -------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x ggplot2::%+%()   masks psych::%+%()
## x ggplot2::alpha() masks psych::alpha()
## x dplyr::filter()  masks stats::filter()
## x dplyr::lag()     masks stats::lag()

Organize Data

#CARROT Data
carrT1 <- read.csv("C:/Users/mmatt/Desktop/Projects/TADS/Data/T1/t1_carrot_c.csv")
#Get difference of cards sorted in reward condition
carrT1$cDif <- carrT1$cct3 - carrT1$cct2
mean(carrT1$cDif,na.rm=TRUE)

## [1] 1.977578

#Puberty Data
pdsT1 <- read.csv("C:/Users/mmatt/Desktop/Projects/TADS/Data/T1/t1_pds_c.csv")
table(pdsT1$chsex)

## 
##  1  2 
## 95 67

#merge cDif and sex 
df <- merge(carrT1[c('id','cDif')], pdsT1[c('id','chsex')], by = 'id')

Unstandardized Regression

cards_on_sex <- lm(cDif ~ chsex, df)
summary(cards_on_sex)

## 
## Call:
## lm(formula = cDif ~ chsex, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -17.3736  -2.3736  -0.8413   3.5095  20.6264 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)   2.9060     1.6159   1.798   0.0741 .
## chsex        -0.5324     1.0827  -0.492   0.6237  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.606 on 152 degrees of freedom
##   (33 observations deleted due to missingness)
## Multiple R-squared:  0.001588,   Adjusted R-squared:  -0.004981 
## F-statistic: 0.2417 on 1 and 152 DF,  p-value: 0.6237

#Check mean of residuals approximates 0
mean(cards_on_sex$residuals)

## [1] -1.40017e-16

#Check assumptions
check_model(cards_on_sex)

For the Unstandardized regression, the mean of residuals does approximate zero. The normality of residuals slightly resembles a normal distribution, though it is leptokurtic. Also, there is not homogeneity of variance. Being female predicted having a smaller difference of cards sorted in reward vs. non-reward condition relative to being male (Beta = -0.52, p = .62, R^2 = .002)

Standardized Regression

#Standardize sex and cDif
df <- mutate(df, zcDif = ((cDif - mean(cDif, na.rm = TRUE)) / sd(cDif, na.rm = TRUE)),
                         zSex = ((chsex - mean(chsex, na.rm = TRUE)) / sd(chsex, na.rm = TRUE)))
#Check standardization
psych::describe(df[4:5])

##       vars   n mean sd median trimmed  mad   min  max range skew kurtosis   se
## zcDif    1 186    0  1  -0.14   -0.01 0.68 -2.58 3.21  5.80 0.26     1.10 0.07
## zSex     2 155    0  1  -0.82   -0.05 0.00 -0.82 1.20  2.03 0.38    -1.87 0.08

#Standardized regression
std_c_on_s <- lm(zcDif ~ zSex, df)
summary(std_c_on_s)

## 
## Call:
## lm(formula = zcDif ~ zSex, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.6511 -0.3622 -0.1284  0.5355  3.1474 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  0.03384    0.08123   0.417    0.678
## zSex        -0.04003    0.08141  -0.492    0.624
## 
## Residual standard error: 1.008 on 152 degrees of freedom
##   (33 observations deleted due to missingness)
## Multiple R-squared:  0.001588,   Adjusted R-squared:  -0.004981 
## F-statistic: 0.2417 on 1 and 152 DF,  p-value: 0.6237

mean(std_c_on_s$residuals)

## [1] 7.406368e-18

#Check assumptions
check_model(std_c_on_s)

Standardization did not appear to substantially alter assumptions of the regression.

Remove Outliers; Standardized Regression

#Check for influencing observations
CoS_check <- gvlma(std_c_on_s, alphalevel = 0.000001)
deletion.gvlma(CoS_check)

## 
## Global test deletion statistics.
## 
## Linear Model:
##  lm(formula = zcDif ~ zSex, data = df) 
## 
## gvlma call:
##  gvlma(x = std_c_on_s, alphalevel = 1e-06) 
## 
## 
## Summary values:
##                          Min.       1st Qu.        Median          Mean
## DeltaGlobalStat -2.739476e+01 -4.131114e+00 -1.928080e+00 -3.494878e-01
## GStatpvalue      5.431028e-03  1.345549e-02  1.502547e-02  1.460487e-02
## DeltaStat1      -8.088605e+01 -5.469839e+00  0.000000e+00  5.863813e-01
## Stat1pvalue      7.590465e-02  1.911187e-01  2.001505e-01  2.030745e-01
## DeltaStat2      -2.032737e+01 -4.186064e+00 -2.016283e+00 -5.106629e-01
## Stat2pvalue      3.857236e-04  1.030071e-03  1.158422e-03  1.118822e-03
## DeltaStat3      -9.955385e+04 -4.558196e+02 -1.001258e+02 -1.106231e+03
## Stat3pvalue      9.999997e-01  1.000000e+00  1.000000e+00  1.000000e+00
## DeltaStat4      -9.935229e+01 -1.385804e+01  0.000000e+00  8.741940e-01
## Stat4pvalue      5.818378e-01  6.635525e-01  6.858036e-01  6.897019e-01
##                      3rd Qu.         Max.
## DeltaGlobalStat 1.083903e-01 1.665506e+01
## GStatpvalue     1.692472e-02 5.788250e-02
## DeltaStat1      4.124836e+00 9.195308e+01
## Stat1pvalue     2.129140e-01 5.754108e-01
## DeltaStat2      0.000000e+00 1.696316e+01
## Stat2pvalue     1.314704e-03 3.393330e-03
## DeltaStat3      1.382486e+02 8.883238e+03
## Stat3pvalue     1.000000e+00 1.000000e+00
## DeltaStat4      1.562460e+01 8.530012e+01
## Stat4pvalue     7.073061e-01 9.740264e-01
## 
## 
## Unusual observations for Global Stat:
##     Delta Global Stat (%)  Global Stat p-value
## 20               13.29795          0.006537792
## 33               14.29156          0.006189002
## 38              -26.72254          0.055907961
## 63               13.76325          0.006372122
## 78              -27.39476          0.057882496
## 79               12.86895          0.006694275
## 107              16.65506          0.005431028
## 108              13.12984          0.006598678
## 116              14.24853          0.006203722
## 125              14.60652          0.006082302

#Find IDs of outliers
outlier.info <- influence.measures(std_c_on_s)
rowid <- data.frame(seq(1,154,1))
outlier.info_df <- cbind(data.frame(outlier.info$infmat), data.frame(outlier.info$is.inf), rowid)
outlier.info_sum <- summary(influence.measures(std_c_on_s))

## Potentially influential observations of
##   lm(formula = zcDif ~ zSex, data = df) :
## 
##     dfb.1_ dfb.zSex dffit   cov.r   cook.d hat  
## 3   -0.19   0.16    -0.25    0.95_*  0.03   0.01
## 27   0.18  -0.15     0.24    0.96_*  0.03   0.01
## 33  -0.21  -0.26    -0.33    0.94_*  0.05   0.02
## 38   0.26   0.31     0.40_*  0.91_*  0.08   0.02
## 51  -0.22   0.18    -0.28    0.93_*  0.04   0.01
## 78   0.26  -0.22     0.34    0.90_*  0.05   0.01
## 116 -0.19   0.16    -0.25    0.95_*  0.03   0.01
## 184  0.25  -0.21     0.32    0.91_*  0.05   0.01

#Estimating the model without outliers
std_c_on_s_r <- lm(zcDif ~ zSex, std_c_on_s$model[-c(3, 27, 33, 38, 51, 78, 116, 184), ])
plot(std_c_on_s_r)

coefficients(summary(std_c_on_s_r))

##                Estimate Std. Error    t value  Pr(>|t|)
## (Intercept)  0.04744734 0.07509991  0.6317895 0.5285192
## zSex        -0.06711088 0.07529339 -0.8913250 0.3742315

CoS_check_r <- gvlma(std_c_on_s_r, alphalevel = 0.0001)
deletion.gvlma(CoS_check_r)

## 
## Global test deletion statistics.
## 
## Linear Model:
##  lm(formula = zcDif ~ zSex, data = std_c_on_s$model[-c(3, 27,      33, 38, 51, 78, 116, 184), ]) 
## 
## gvlma call:
##  gvlma(x = std_c_on_s_r, alphalevel = 1e-04) 
## 
## 
## Summary values:
##                          Min.       1st Qu.        Median          Mean
## DeltaGlobalStat -4.449556e+01 -3.750682e+00  -2.179607566  -0.511544010
## GStatpvalue      6.998844e-04  2.406153e-03   0.003332498   0.003706182
## DeltaStat1      -7.380414e+01 -3.778968e+00  -2.622280690  -0.307510070
## Stat1pvalue      2.128140e-02  6.870124e-02   0.077314977   0.076933720
## DeltaStat2      -4.645805e+01 -4.461875e+00  -4.080078620  -0.618996575
## Stat2pvalue      4.569142e-04  1.263614e-03   0.001749817   0.001649880
## DeltaStat3      -1.746712e+03 -2.380280e+02 -56.718108142 499.786870857
## Stat3pvalue      9.999997e-01  9.999999e-01   0.999999987   0.999999970
## DeltaStat4      -8.050263e+01 -5.986761e+00   0.033399419  -0.348091853
## Stat4pvalue      5.821549e-02  8.925806e-02   0.099475990   0.102835000
##                      3rd Qu.         Max.
## DeltaGlobalStat 2.359864e+00 1.938708e+01
## GStatpvalue     3.728734e-03 6.229961e-02
## DeltaStat1      3.410590e+00 6.550526e+01
## Stat1pvalue     7.909534e-02 3.595559e-01
## DeltaStat2      1.794675e+00 2.029113e+01
## Stat2pvalue     1.787318e-03 1.937283e-02
## DeltaStat3      6.027247e+02 2.371497e+04
## Stat3pvalue     1.000000e+00 1.000000e+00
## DeltaStat4      6.438799e+00 3.223307e+01
## Stat4pvalue     1.102526e-01 4.670414e-01
## 
## 
## Unusual observations for Global Stat:
##     Delta Global Stat (%)  Global Stat p-value
## 67              -38.79197           0.04263333
## 144             -44.49556           0.06229961

#Check Assumptions
check_model(std_c_on_s_r)

gvlma::gvlma(std_c_on_s_r)

## 
## Call:
## lm(formula = zcDif ~ zSex, data = std_c_on_s$model[-c(3, 27, 
##     33, 38, 51, 78, 116, 184), ])
## 
## Coefficients:
## (Intercept)         zSex  
##     0.04745     -0.06711  
## 
## 
## ASSESSMENT OF THE LINEAR MODEL ASSUMPTIONS
## USING THE GLOBAL TEST ON 4 DEGREES-OF-FREEDOM:
## Level of Significance =  0.05 
## 
## Call:
##  gvlma::gvlma(x = std_c_on_s_r) 
## 
##                        Value  p-value                   Decision
## Global Stat        1.613e+01 0.002851 Assumptions NOT satisfied!
## Skewness           3.204e+00 0.073436    Assumptions acceptable.
## Kurtosis           1.021e+01 0.001396 Assumptions NOT satisfied!
## Link Function      5.751e-16 1.000000    Assumptions acceptable.
## Heteroscedasticity 2.713e+00 0.099532    Assumptions acceptable.

Removing outliers did improve the model, but all assumptions are still not satisfied. Mainly, the distribution of the residuals remained leptokurtic.If we accept the model, it can be interpreted as “Being female predicted having a smaller difference of cards sorted in reward vs. non-reward condition relative to being male (Standardized Beta = -0.067, p = .37, R^2 = .005)”

Assignment1

MM

9/6/2021

Organize Data

Unstandardized Regression

Standardized Regression

Remove Outliers; Standardized Regression