This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Cmd+Shift+Enter.

library(readxl)
library(ggplot2)
df <- read_excel("~/Documents/family_data.xlsx", sheet = 1)
names(df)

df <- df %>%
  mutate(
    family_history = if_else(Number_Affected >= 2, "High", "Low"),
    family_history = factor(family_history, levels = c("Low", "High"))
  )

ggplot(df, aes(x = Number_Affected, y = Penetrance_score, color = Total_Family)) +
  geom_point(size = 3, alpha = 0.8) +
  geom_smooth(method = "lm", se = TRUE) +
  scale_color_viridis_c(name = "Total Family\nSize") +
  labs(
    x     = "Number of Affected Relatives",
    y     = "C9 Penetrance Score",
    title = "Penetrance vs. Family Burden\n(colored by Total Family Size)"
  ) +
  theme_minimal(base_size = 14)

lm1 <- lm(Penetrance_score ~ Number_Affected, data = df)
summary(lm1)

lm2 <- lm(Penetrance_score ~ Number_Affected + Total_Family, data = df)
summary(lm2)

ggplot(df, aes(x = family_history, y = Penetrance_score, fill = family_history)) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(width = 0.2, alpha = 0.6) +
  labs(
    x = "Family History Group",
    y = "C9 Penetrance Score",
    title = "Low vs. High Family History"
  ) +
  theme_classic(base_size = 13) +
  scale_fill_viridis_d()
library(readxl)
library(dplyr)
G3;
Attaching package: ‘dplyr’

gG3;The following objects are masked from ‘package:stats’:

    filter, lag

gG3;The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union

g
library(ggplot2)
library(viridis)  # for the color scale
G3;Loading required package: viridisLite
g
# 1. Read in your data
df <- read_excel("~/Documents/family_data.xlsx", sheet = 1)

# 2. (Re)create the Low/High factor if you need it later
df <- df %>%
  mutate(
    family_history = if_else(Number_Affected >= 2, "High", "Low"),
    family_history = factor(family_history, levels = c("Low","High"))
  )

# 3. Continuous scatter + regression, coloring by Total_family
ggplot(df, aes(
    x     = Number_Affected,
    y     = Penetrance_score,
    color = Total_family
  )) +
  geom_point(size = 3, alpha = 0.8) +
  geom_smooth(method = "lm", se = TRUE) +
  scale_color_viridis_c(name = "Total Family\nSize") +
  labs(
    x     = "Number of Affected Relatives",
    y     = "C9 Penetrance Score",
    title = "Family Burden vs. C9 Penetrance"
  ) +
  theme_minimal(base_size = 14)


# 4. Linear model: penetrance ~ Number_Affected
lm1 <- lm(Penetrance_score ~ Number_Affected, data = df)
summary(lm1)

Call:
lm(formula = Penetrance_score ~ Number_Affected, data = df)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.27106 -0.08102  0.01894  0.05402  0.34894 

Coefficients:
                Estimate Std. Error t value
(Intercept)      0.34091    0.06460   5.277
Number_Affected  0.07008    0.02874   2.438
                Pr(>|t|)    
(Intercept)     6.16e-05 ***
Number_Affected    0.026 *  
---
Signif. codes:  
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.1515 on 17 degrees of freedom
Multiple R-squared:  0.2591,    Adjusted R-squared:  0.2155 
F-statistic: 5.946 on 1 and 17 DF,  p-value: 0.02602
# 5. Adjusted model with Total_family
lm2 <- lm(Penetrance_score ~ Number_Affected + Total_family, data = df)
summary(lm2)

Call:
lm(formula = Penetrance_score ~ Number_Affected + Total_family, 
    data = df)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.27186 -0.07715  0.02024  0.06274  0.32648 

Coefficients:
                 Estimate Std. Error t value
(Intercept)     0.3328102  0.0757433   4.394
Number_Affected 0.0654414  0.0361390   1.811
Total_family    0.0006987  0.0031309   0.223
                Pr(>|t|)    
(Intercept)     0.000453 ***
Number_Affected 0.088985 .  
Total_family    0.826225    
---
Signif. codes:  
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.1559 on 16 degrees of freedom
Multiple R-squared:  0.2614,    Adjusted R-squared:  0.1691 
F-statistic: 2.832 on 2 and 16 DF,  p-value: 0.08855
summary(lm1)

Call:
lm(formula = Penetrance_score ~ Number_Affected, data = df)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.27106 -0.08102  0.01894  0.05402  0.34894 

Coefficients:
                Estimate Std. Error t value
(Intercept)      0.34091    0.06460   5.277
Number_Affected  0.07008    0.02874   2.438
                Pr(>|t|)    
(Intercept)     6.16e-05 ***
Number_Affected    0.026 *  
---
Signif. codes:  
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.1515 on 17 degrees of freedom
Multiple R-squared:  0.2591,    Adjusted R-squared:  0.2155 
F-statistic: 5.946 on 1 and 17 DF,  p-value: 0.02602
t_res <- t.test(Penetrance_score ~ family_history, data = df)
print(t_res)

    Welch Two Sample t-test

data:  Penetrance_score by family_history
t = -2.371, df = 16.29, p-value = 0.03039
alternative hypothesis: true difference in means between group Low and group High is not equal to 0
95 percent confidence interval:
 -0.29811478 -0.01688522
sample estimates:
 mean in group Low mean in group High 
            0.3825             0.5400 
df %>%
  group_by(family_history) %>%
  summarise(
    mean = mean(Penetrance_score),
    sd   = sd(Penetrance_score)
  ) %>%
  ggplot(aes(x = family_history, y = mean, fill = family_history)) +
  geom_col(width = 0.5) +
  geom_errorbar(aes(ymin = mean - sd, ymax = mean + sd), width = 0.2) +
  labs(
    x = "Family History",
    y = "Mean Penetrance ± SD",
    title = "Group Means with Standard Deviations"
  ) +
  theme_minimal() +
  theme(legend.position = "none")

library(dplyr)

df <- df %>%
  mutate(
    K_category = if_else(Penetrance_score >= 0.5, "HighK", "LowK"),
    K_category = factor(K_category, levels = c("LowK","HighK"))
  )


library(tidyr)

heat_data <- df %>%
  group_by(ClinicalSyndrome, NeuropathSubtype) %>%
  summarise(
    total   = n(),
    highK   = sum(K_category == "HighK"),
    propHigh = highK / total,
    .groups = "drop"
  )
G1;Error in `group_by()`:
! Must group by variables found in `.data`.
Column `ClinicalSyndrome` is not found.
Column `NeuropathSubtype` is not found.
Run `]8;;x-r-run:rlang::last_trace()rlang::last_trace()]8;;` to see where the error occurred.
g
LS0tCnRpdGxlOiAiQzkgcGVuZXRyYW5jZSB2cy4gRmFtaWx5IEhpc3RvcnkgQnVyZGVuIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgpUaGlzIGlzIGFuIFtSIE1hcmtkb3duXShodHRwOi8vcm1hcmtkb3duLnJzdHVkaW8uY29tKSBOb3RlYm9vay4gV2hlbiB5b3UgZXhlY3V0ZSBjb2RlIHdpdGhpbiB0aGUgbm90ZWJvb2ssIHRoZSByZXN1bHRzIGFwcGVhciBiZW5lYXRoIHRoZSBjb2RlLiAKClRyeSBleGVjdXRpbmcgdGhpcyBjaHVuayBieSBjbGlja2luZyB0aGUgKlJ1biogYnV0dG9uIHdpdGhpbiB0aGUgY2h1bmsgb3IgYnkgcGxhY2luZyB5b3VyIGN1cnNvciBpbnNpZGUgaXQgYW5kIHByZXNzaW5nICpDbWQrU2hpZnQrRW50ZXIqLiAKCmBgYHtyfQpsaWJyYXJ5KHJlYWR4bCkKbGlicmFyeShnZ3Bsb3QyKQpkZiA8LSByZWFkX2V4Y2VsKCJ+L0RvY3VtZW50cy9mYW1pbHlfZGF0YS54bHN4Iiwgc2hlZXQgPSAxKQpuYW1lcyhkZikKCmRmIDwtIGRmICU+JQogIG11dGF0ZSgKICAgIGZhbWlseV9oaXN0b3J5ID0gaWZfZWxzZShOdW1iZXJfQWZmZWN0ZWQgPj0gMiwgIkhpZ2giLCAiTG93IiksCiAgICBmYW1pbHlfaGlzdG9yeSA9IGZhY3RvcihmYW1pbHlfaGlzdG9yeSwgbGV2ZWxzID0gYygiTG93IiwgIkhpZ2giKSkKICApCgpnZ3Bsb3QoZGYsIGFlcyh4ID0gTnVtYmVyX0FmZmVjdGVkLCB5ID0gUGVuZXRyYW5jZV9zY29yZSwgY29sb3IgPSBUb3RhbF9GYW1pbHkpKSArCiAgZ2VvbV9wb2ludChzaXplID0gMywgYWxwaGEgPSAwLjgpICsKICBnZW9tX3Ntb290aChtZXRob2QgPSAibG0iLCBzZSA9IFRSVUUpICsKICBzY2FsZV9jb2xvcl92aXJpZGlzX2MobmFtZSA9ICJUb3RhbCBGYW1pbHlcblNpemUiKSArCiAgbGFicygKICAgIHggICAgID0gIk51bWJlciBvZiBBZmZlY3RlZCBSZWxhdGl2ZXMiLAogICAgeSAgICAgPSAiQzkgUGVuZXRyYW5jZSBTY29yZSIsCiAgICB0aXRsZSA9ICJQZW5ldHJhbmNlIHZzLiBGYW1pbHkgQnVyZGVuXG4oY29sb3JlZCBieSBUb3RhbCBGYW1pbHkgU2l6ZSkiCiAgKSArCiAgdGhlbWVfbWluaW1hbChiYXNlX3NpemUgPSAxNCkKCmxtMSA8LSBsbShQZW5ldHJhbmNlX3Njb3JlIH4gTnVtYmVyX0FmZmVjdGVkLCBkYXRhID0gZGYpCnN1bW1hcnkobG0xKQoKbG0yIDwtIGxtKFBlbmV0cmFuY2Vfc2NvcmUgfiBOdW1iZXJfQWZmZWN0ZWQgKyBUb3RhbF9GYW1pbHksIGRhdGEgPSBkZikKc3VtbWFyeShsbTIpCgpnZ3Bsb3QoZGYsIGFlcyh4ID0gZmFtaWx5X2hpc3RvcnksIHkgPSBQZW5ldHJhbmNlX3Njb3JlLCBmaWxsID0gZmFtaWx5X2hpc3RvcnkpKSArCiAgZ2VvbV9ib3hwbG90KG91dGxpZXIuc2hhcGUgPSBOQSkgKwogIGdlb21faml0dGVyKHdpZHRoID0gMC4yLCBhbHBoYSA9IDAuNikgKwogIGxhYnMoCiAgICB4ID0gIkZhbWlseSBIaXN0b3J5IEdyb3VwIiwKICAgIHkgPSAiQzkgUGVuZXRyYW5jZSBTY29yZSIsCiAgICB0aXRsZSA9ICJMb3cgdnMuIEhpZ2ggRmFtaWx5IEhpc3RvcnkiCiAgKSArCiAgdGhlbWVfY2xhc3NpYyhiYXNlX3NpemUgPSAxMykgKwogIHNjYWxlX2ZpbGxfdmlyaWRpc19kKCkKCmBgYAoKCgpgYGB7cn0KbGlicmFyeShyZWFkeGwpCmxpYnJhcnkoZHBseXIpCmxpYnJhcnkoZ2dwbG90MikKbGlicmFyeSh2aXJpZGlzKSAgIyBmb3IgdGhlIGNvbG9yIHNjYWxlCgojIDEuIFJlYWQgaW4geW91ciBkYXRhCmRmIDwtIHJlYWRfZXhjZWwoIn4vRG9jdW1lbnRzL2ZhbWlseV9kYXRhLnhsc3giLCBzaGVldCA9IDEpCgojIDIuIChSZSljcmVhdGUgdGhlIExvdy9IaWdoIGZhY3RvciBpZiB5b3UgbmVlZCBpdCBsYXRlcgpkZiA8LSBkZiAlPiUKICBtdXRhdGUoCiAgICBmYW1pbHlfaGlzdG9yeSA9IGlmX2Vsc2UoTnVtYmVyX0FmZmVjdGVkID49IDIsICJIaWdoIiwgIkxvdyIpLAogICAgZmFtaWx5X2hpc3RvcnkgPSBmYWN0b3IoZmFtaWx5X2hpc3RvcnksIGxldmVscyA9IGMoIkxvdyIsIkhpZ2giKSkKICApCgojIDMuIENvbnRpbnVvdXMgc2NhdHRlciArIHJlZ3Jlc3Npb24sIGNvbG9yaW5nIGJ5IFRvdGFsX2ZhbWlseQpnZ3Bsb3QoZGYsIGFlcygKICAgIHggICAgID0gTnVtYmVyX0FmZmVjdGVkLAogICAgeSAgICAgPSBQZW5ldHJhbmNlX3Njb3JlLAogICAgY29sb3IgPSBUb3RhbF9mYW1pbHkKICApKSArCiAgZ2VvbV9wb2ludChzaXplID0gMywgYWxwaGEgPSAwLjgpICsKICBnZW9tX3Ntb290aChtZXRob2QgPSAibG0iLCBzZSA9IFRSVUUpICsKICBzY2FsZV9jb2xvcl92aXJpZGlzX2MobmFtZSA9ICJUb3RhbCBGYW1pbHlcblNpemUiKSArCiAgbGFicygKICAgIHggICAgID0gIk51bWJlciBvZiBBZmZlY3RlZCBSZWxhdGl2ZXMiLAogICAgeSAgICAgPSAiQzkgUGVuZXRyYW5jZSBTY29yZSIsCiAgICB0aXRsZSA9ICJGYW1pbHkgQnVyZGVuIHZzLiBDOSBQZW5ldHJhbmNlIgogICkgKwogIHRoZW1lX21pbmltYWwoYmFzZV9zaXplID0gMTQpCgojIDQuIExpbmVhciBtb2RlbDogcGVuZXRyYW5jZSB+IE51bWJlcl9BZmZlY3RlZApsbTEgPC0gbG0oUGVuZXRyYW5jZV9zY29yZSB+IE51bWJlcl9BZmZlY3RlZCwgZGF0YSA9IGRmKQpzdW1tYXJ5KGxtMSkKCiMgNS4gQWRqdXN0ZWQgbW9kZWwgd2l0aCBUb3RhbF9mYW1pbHkKbG0yIDwtIGxtKFBlbmV0cmFuY2Vfc2NvcmUgfiBOdW1iZXJfQWZmZWN0ZWQgKyBUb3RhbF9mYW1pbHksIGRhdGEgPSBkZikKc3VtbWFyeShsbTIpCgpgYGAKYGBge3J9CnN1bW1hcnkobG0xKQoKYGBgCgoKYGBge3J9CgpkZiA8LSBkZiAlPiUKICBtdXRhdGUoCiAgICBmYW1pbHlfaGlzdG9yeSA9IGlmX2Vsc2UoTnVtYmVyX0FmZmVjdGVkID49IDIsICJIaWdoIiwgIkxvdyIpLAogICAgZmFtaWx5X2hpc3RvcnkgPSBmYWN0b3IoZmFtaWx5X2hpc3RvcnksIGxldmVscyA9IGMoIkxvdyIsIkhpZ2giKSkKICApCgpwX2JveCA8LSBnZ3Bsb3QoZGYsIGFlcyh4ID0gZmFtaWx5X2hpc3RvcnksIHkgPSBQZW5ldHJhbmNlX3Njb3JlLCBmaWxsID0gZmFtaWx5X2hpc3RvcnkpKSArCiAgZ2VvbV9ib3hwbG90KG91dGxpZXIuc2hhcGUgPSBOQSwgd2lkdGggPSAwLjUpICsKICBnZW9tX2ppdHRlcih3aWR0aCA9IDAuMTUsIHNpemUgPSAyLCBhbHBoYSA9IDAuNykgKwogIGxhYnMoCiAgICB4ICAgICA9ICJGYW1pbHkgSGlzdG9yeSBHcm91cCIsCiAgICB5ICAgICA9ICJDOSBQZW5ldHJhbmNlIFNjb3JlIiwKICAgIHRpdGxlID0gIkxvdyB2cy4gSGlnaCBGYW1pbHkgSGlzdG9yeSIKICApICsKICB0aGVtZV9jbGFzc2ljKGJhc2Vfc2l6ZSA9IDE0KSArCiAgc2NhbGVfZmlsbF9tYW51YWwodmFsdWVzID0gYygiIzFiOWU3NyIsICIjZDk1ZjAyIikpICsKICB0aGVtZShsZWdlbmQucG9zaXRpb24gPSAibm9uZSIpCgpwcmludChwX2JveCkKCnRfcmVzIDwtIHQudGVzdChQZW5ldHJhbmNlX3Njb3JlIH4gZmFtaWx5X2hpc3RvcnksIGRhdGEgPSBkZikKcHJpbnQodF9yZXMpCgpsaWJyYXJ5KGRwbHlyKQoKZGYgJT4lCiAgZ3JvdXBfYnkoZmFtaWx5X2hpc3RvcnkpICU+JQogIHN1bW1hcmlzZSgKICAgIG4gICAgICAgID0gbigpLAogICAgbWVhbiAgICAgPSBtZWFuKFBlbmV0cmFuY2Vfc2NvcmUpLAogICAgbWVkaWFuICAgPSBtZWRpYW4oUGVuZXRyYW5jZV9zY29yZSksCiAgICBpcXIgICAgICA9IElRUihQZW5ldHJhbmNlX3Njb3JlKSwKICAgIC5ncm91cHMgID0gImRyb3AiCiAgKQoKdF9yZXMgPC0gdC50ZXN0KFBlbmV0cmFuY2Vfc2NvcmUgfiBmYW1pbHlfaGlzdG9yeSwgZGF0YSA9IGRmKQpwcmludCh0X3JlcykKCgpgYGAKCgpgYGB7cn0KZGYgJT4lCiAgZ3JvdXBfYnkoZmFtaWx5X2hpc3RvcnkpICU+JQogIHN1bW1hcmlzZSgKICAgIG1lYW4gPSBtZWFuKFBlbmV0cmFuY2Vfc2NvcmUpLAogICAgc2QgICA9IHNkKFBlbmV0cmFuY2Vfc2NvcmUpCiAgKSAlPiUKICBnZ3Bsb3QoYWVzKHggPSBmYW1pbHlfaGlzdG9yeSwgeSA9IG1lYW4sIGZpbGwgPSBmYW1pbHlfaGlzdG9yeSkpICsKICBnZW9tX2NvbCh3aWR0aCA9IDAuNSkgKwogIGdlb21fZXJyb3JiYXIoYWVzKHltaW4gPSBtZWFuIC0gc2QsIHltYXggPSBtZWFuICsgc2QpLCB3aWR0aCA9IDAuMikgKwogIGxhYnMoCiAgICB4ID0gIkZhbWlseSBIaXN0b3J5IiwKICAgIHkgPSAiTWVhbiBQZW5ldHJhbmNlIMKxIFNEIiwKICAgIHRpdGxlID0gIkdyb3VwIE1lYW5zIHdpdGggU3RhbmRhcmQgRGV2aWF0aW9ucyIKICApICsKICB0aGVtZV9taW5pbWFsKCkgKwogIHRoZW1lKGxlZ2VuZC5wb3NpdGlvbiA9ICJub25lIikKYGBgCgoKYGBge3J9CmxpYnJhcnkoZHBseXIpCgpkZiA8LSBkZiAlPiUKICBtdXRhdGUoCiAgICBLX2NhdGVnb3J5ID0gaWZfZWxzZShQZW5ldHJhbmNlX3Njb3JlID49IDAuNSwgIkhpZ2hLIiwgIkxvd0siKSwKICAgIEtfY2F0ZWdvcnkgPSBmYWN0b3IoS19jYXRlZ29yeSwgbGV2ZWxzID0gYygiTG93SyIsIkhpZ2hLIikpCiAgKQoKCmxpYnJhcnkodGlkeXIpCgpoZWF0X2RhdGEgPC0gZGYgJT4lCiAgZ3JvdXBfYnkoQ2xpbmljYWxTeW5kcm9tZSwgTmV1cm9wYXRoU3VidHlwZSkgJT4lCiAgc3VtbWFyaXNlKAogICAgdG90YWwgICA9IG4oKSwKICAgIGhpZ2hLICAgPSBzdW0oS19jYXRlZ29yeSA9PSAiSGlnaEsiKSwKICAgIHByb3BIaWdoID0gaGlnaEsgLyB0b3RhbCwKICAgIC5ncm91cHMgPSAiZHJvcCIKICApCgpsaWJyYXJ5KGdncGxvdDIpCmxpYnJhcnkoc2NhbGVzKSAgIyBmb3IgcGVyY2VudF9mb3JtYXQoKQoKZ2dwbG90KGhlYXRfZGF0YSwgYWVzKAogICAgeCAgICA9IE5ldXJvcGF0aFN1YnR5cGUsCiAgICB5ICAgID0gQ2xpbmljYWxTeW5kcm9tZSwKICAgIGZpbGwgPSBwcm9wSGlnaAogICkpICsKICBnZW9tX3RpbGUoY29sb3IgPSAid2hpdGUiKSArCiAgc2NhbGVfZmlsbF9ncmFkaWVudCgKICAgIGxvdyAgPSAiZ3JleTkwIiwKICAgIGhpZ2ggPSAic3RlZWxibHVlIiwKICAgIGxhYmVscyA9IHBlcmNlbnRfZm9ybWF0KGFjY3VyYWN5ID0gMSksCiAgICBuYW1lICAgPSAiUHJvcC4gSGlnaOKAkUsiCiAgKSArCiAgbGFicygKICAgIHggICAgID0gIk5ldXJvcGF0aG9sb2dpY2FsIFN1YnR5cGUiLAogICAgeSAgICAgPSAiQ2xpbmljYWwgU3luZHJvbWUiLAogICAgdGl0bGUgPSAiUHJvcG9ydGlvbiBvZiBIaWdoIEvigJFTY29yZSBDYXJyaWVyc1xuYnkgQ2xpbmljYWwgJiBOZXVyb3BhdGggU3VidHlwZSIKICApICsKICB0aGVtZV9taW5pbWFsKGJhc2Vfc2l6ZSA9IDE0KSArCiAgdGhlbWUoCiAgICBheGlzLnRleHQueCAgICAgPSBlbGVtZW50X3RleHQoYW5nbGUgPSA0NSwgaGp1c3QgPSAxKSwKICAgIHBhbmVsLmdyaWQgICAgICA9IGVsZW1lbnRfYmxhbmsoKQogICkKCgoKYGBgCgo=