knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(knitr)
library(prettyunits)
PLRdf <- read_csv("/Users/tatum.connell/Downloads/PLR_09082(1).csv")
## Rows: 3250 Columns: 509
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (12): URSI, Enroll_Season, Commercial_Use, Participant_Status, var5, va...
## dbl (497): Enroll_Year, var1, var2, Study_Site, Release_Number, var3, var4, ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
mean(PLRdf$Child_Age, na.rm=T)
## [1] 10.40901
sd(PLRdf$Child_Age, na.rm=T)
## [1] 3.586195
mymean <- mean(PLRdf$Child_Age, na.rm=T)
The mean child age is 10.4090149. The mean child age is 10.41.
PLRdf %>%
group_by(Enroll_Year) %>%
summarise(mean_childage = mean(Child_Age, na.rm=T),
sd_childage = sd(Child_Age, na.rm=T),
mean_p1_age = mean(P1_Age, na.rm=T),
sd_p1_age = sd(P1_Age, na.rm=T),
mean_p2_age = mean(P2_Age, na.rm = T),
sd_p2_age = mean(P2_Age, na.rm=T))
table(PLRdf$Child_Sex)
##
## 0 1
## 2105 1145
prop.table(table(PLRdf$Child_Sex))
##
## 0 1
## 0.6476923 0.3523077
table(PLRdf$P1_Sex)
##
## 0 1
## 514 2693
prop.table(table(PLRdf$P1_Sex))
##
## 0 1
## 0.1602744 0.8397256
table(PLRdf$P2_Sex)
##
## 0 1
## 2161 539
prop.table(table(PLRdf$P2_Sex))
##
## 0 1
## 0.8003704 0.1996296
ggplot(PLRdf, aes(Enroll_Year, CBCL_Total)) +
stat_summary(fun = 'mean', geom = 'bar')
## Warning: Removed 157 rows containing non-finite values (`stat_summary()`).
ggplot(PLRdf, aes(Child_Sex)) +
geom_bar(stat='count', fill = "#635a8f")
## T-Test
### CBCL Internalizing Score
t.test(PLRdf$CBCL_Int ~ as.factor(PLRdf$Child_Sex), paired=F)
##
## Welch Two Sample t-test
##
## data: PLRdf$CBCL_Int by as.factor(PLRdf$Child_Sex)
## t = -1.3642, df = 2217.5, p-value = 0.1727
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
## -1.0394550 0.1865837
## sample estimates:
## mean in group 0 mean in group 1
## 9.912438 10.338873
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.