knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(knitr)
library(prettyunits)
PLRdf <- read_csv("/Users/tatum.connell/Downloads/PLR_09082(1).csv")
## Rows: 3250 Columns: 509
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (12): URSI, Enroll_Season, Commercial_Use, Participant_Status, var5, va...
## dbl (497): Enroll_Year, var1, var2, Study_Site, Release_Number, var3, var4, ...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

PLR Demographics

mean(PLRdf$Child_Age, na.rm=T)
## [1] 10.40901
sd(PLRdf$Child_Age, na.rm=T)
## [1] 3.586195
mymean <- mean(PLRdf$Child_Age, na.rm=T)

The mean child age is 10.4090149. The mean child age is 10.41.

Child & Parent Age

PLRdf %>%
  group_by(Enroll_Year) %>%
  summarise(mean_childage = mean(Child_Age, na.rm=T), 
            sd_childage = sd(Child_Age, na.rm=T), 
            mean_p1_age = mean(P1_Age, na.rm=T),
            sd_p1_age = sd(P1_Age, na.rm=T),
            mean_p2_age = mean(P2_Age, na.rm = T), 
            sd_p2_age = mean(P2_Age, na.rm=T))

Child & Parent Sex

table(PLRdf$Child_Sex)
## 
##    0    1 
## 2105 1145
prop.table(table(PLRdf$Child_Sex))
## 
##         0         1 
## 0.6476923 0.3523077
table(PLRdf$P1_Sex)
## 
##    0    1 
##  514 2693
prop.table(table(PLRdf$P1_Sex))
## 
##         0         1 
## 0.1602744 0.8397256
table(PLRdf$P2_Sex)
## 
##    0    1 
## 2161  539
prop.table(table(PLRdf$P2_Sex))
## 
##         0         1 
## 0.8003704 0.1996296

CBCL Total Score

ggplot(PLRdf, aes(Enroll_Year, CBCL_Total)) +
  stat_summary(fun = 'mean', geom = 'bar')
## Warning: Removed 157 rows containing non-finite values (`stat_summary()`).

Child Sex

ggplot(PLRdf, aes(Child_Sex)) +
  geom_bar(stat='count', fill = "#635a8f")

## T-Test
### CBCL Internalizing Score

t.test(PLRdf$CBCL_Int ~ as.factor(PLRdf$Child_Sex), paired=F)
## 
##  Welch Two Sample t-test
## 
## data:  PLRdf$CBCL_Int by as.factor(PLRdf$Child_Sex)
## t = -1.3642, df = 2217.5, p-value = 0.1727
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
##  -1.0394550  0.1865837
## sample estimates:
## mean in group 0 mean in group 1 
##        9.912438       10.338873

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.