# Specify the file path (update if necessary)
file_path <- "lgggbm_tcga_pub.tar.gz"
# Use untar() to extract the contents
untar(file_path, exdir = ".")
# Check the files extracted
list.files("extracted_files")
## character(0)
# Define the file path (you should update this to the correct file path)
file_path <- "lgggbm_tcga_pub/data_clinical_patient.txt"
# Read the data into a data frame called 'pts'
pts <- read.table(file_path, header = TRUE, sep = "\t", stringsAsFactors = FALSE, comment.char = "#")
# Check the first few rows to ensure it was imported correctly
head(pts)
## PATIENT_ID STUDY BCR_STATUS HISTOLOGICAL_DIAGNOSIS AGE
## 1 TCGA-CS-4938 Brain Lower Grade Glioma IGC astrocytoma 31
## 2 TCGA-CS-4941 Brain Lower Grade Glioma IGC astrocytoma 67
## 3 TCGA-CS-4942 Brain Lower Grade Glioma IGC astrocytoma 44
## 4 TCGA-CS-4943 Brain Lower Grade Glioma IGC astrocytoma 37
## 5 TCGA-CS-4944 Brain Lower Grade Glioma IGC astrocytoma 50
## 6 TCGA-CS-5390 Brain Lower Grade Glioma IGC oligodendroglioma 47
## SEX OS_MONTHS OS_STATUS KARNOFSKY_PERFORMANCE_SCORE
## 1 Female 4.7 0:LIVING 90
## 2 Male 7.7 1:DECEASED 90
## 3 Female 43.9 1:DECEASED 90
## 4 Male 18.1 0:LIVING 50
## 5 Male 10.6 0:LIVING 90
## 6 Female 64.6 0:LIVING 100
# Install required packages if you don't have them yet
# install.packages(c("dplyr", "kableExtra", "gt"))
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(kableExtra)
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
library(gt)
# install.packages("summarytools")
summarytools::dfSummary(pts)
## Data Frame Summary
## pts
## Dimensions: 1122 x 9
## Duplicates: 0
##
## ---------------------------------------------------------------------------------------------------------------------------------
## No Variable Stats / Values Freqs (% of Valid) Graph Valid Missing
## ---- ----------------------------- ----------------------------- --------------------- --------------------- ---------- ---------
## 1 PATIENT_ID 1. TCGA-02-0001 1 ( 0.1%) 1122 0
## [character] 2. TCGA-02-0003 1 ( 0.1%) (100.0%) (0.0%)
## 3. TCGA-02-0004 1 ( 0.1%)
## 4. TCGA-02-0006 1 ( 0.1%)
## 5. TCGA-02-0007 1 ( 0.1%)
## 6. TCGA-02-0009 1 ( 0.1%)
## 7. TCGA-02-0010 1 ( 0.1%)
## 8. TCGA-02-0011 1 ( 0.1%)
## 9. TCGA-02-0014 1 ( 0.1%)
## 10. TCGA-02-0015 1 ( 0.1%)
## [ 1112 others ] 1112 (99.1%) IIIIIIIIIIIIIIIIIII
##
## 2 STUDY 1. Brain Lower Grade Glioma 516 (46.0%) IIIIIIIII 1122 0
## [character] 2. Glioblastoma multiforme 606 (54.0%) IIIIIIIIII (100.0%) (0.0%)
##
## 3 BCR_STATUS 1. IGC 951 (84.8%) IIIIIIIIIIIIIIII 1122 0
## [character] 2. NCH 171 (15.2%) III (100.0%) (0.0%)
##
## 4 HISTOLOGICAL_DIAGNOSIS 1. (Empty string) 75 ( 6.7%) I 1122 0
## [character] 2. astrocytoma 169 (15.1%) III (100.0%) (0.0%)
## 3. glioblastoma 590 (52.6%) IIIIIIIIII
## 4. oligoastrocytoma 114 (10.2%) II
## 5. oligodendroglioma 174 (15.5%) III
##
## 5 AGE Mean (sd) : 51.4 (15.8) 75 distinct values : : 1047 75
## [numeric] min < med < max: : . : : (93.3%) (6.7%)
## 10 < 52 < 89 : : : : : :
## IQR (CV) : 24 (0.3) : : : : : : :
## : : : : : : : : .
##
## 6 SEX 1. (Empty string) 75 ( 6.7%) I 1122 0
## [character] 2. Female 429 (38.2%) IIIIIII (100.0%) (0.0%)
## 3. Male 618 (55.1%) IIIIIIIIIII
##
## 7 OS_MONTHS Mean (sd) : 19.1 (24.2) 417 distinct values : 1047 75
## [numeric] min < med < max: : (93.3%) (6.7%)
## 0 < 11.7 < 211 :
## IQR (CV) : 15.7 (1.3) :
## : : .
##
## 8 OS_STATUS 1. (Empty string) 76 ( 6.8%) I 1122 0
## [character] 2. 0:LIVING 522 (46.5%) IIIIIIIII (100.0%) (0.0%)
## 3. 1:DECEASED 524 (46.7%) IIIIIIIII
##
## 9 KARNOFSKY_PERFORMANCE_SCORE Mean (sd) : 81.3 (14.5) 20 : 2 ( 0.3%) 697 425
## [numeric] min < med < max: 40 : 18 ( 2.6%) (62.1%) (37.9%)
## 20 < 80 < 100 50 : 4 ( 0.6%)
## IQR (CV) : 10 (0.2) 60 : 95 (13.6%) II
## 70 : 26 ( 3.7%)
## 80 : 289 (41.5%) IIIIIIII
## 90 : 123 (17.6%) III
## 100 : 140 (20.1%) IIII
## ---------------------------------------------------------------------------------------------------------------------------------
# Install the gtsummary package if you don't have it installed yet
# install.packages("gtsummary")
library(gtsummary)
library(dplyr)
# Create a summary table excluding PATIENT_ID column
table_summary <- pts %>%
select(-PATIENT_ID) %>% # Exclude the PATIENT_ID column
tbl_summary(by = "HISTOLOGICAL_DIAGNOSIS")
# Print the summary table
table_summary
| Characteristic | N = 75 1 |
astrocytoma N = 1691 |
glioblastoma N = 5901 |
oligoastrocytoma N = 1141 |
oligodendroglioma N = 1741 |
|---|---|---|---|---|---|
| STUDY | |||||
| Brain Lower Grade Glioma | 59 (79%) | 169 (100%) | 0 (0%) | 114 (100%) | 174 (100%) |
| Glioblastoma multiforme | 16 (21%) | 0 (0%) | 590 (100%) | 0 (0%) | 0 (0%) |
| BCR_STATUS | |||||
| IGC | 34 (45%) | 113 (67%) | 585 (99%) | 88 (77%) | 131 (75%) |
| NCH | 41 (55%) | 56 (33%) | 5 (0.8%) | 26 (23%) | 43 (25%) |
| AGE | NA (NA, NA) | 39 (33, 51) | 59 (50, 68) | 38 (30, 51) | 46 (34, 54) |
| Unknown | 75 | 0 | 0 | 0 | 0 |
| SEX | |||||
| 75 (100%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | |
| Female | 0 (0%) | 74 (44%) | 228 (39%) | 51 (45%) | 76 (44%) |
| Male | 0 (0%) | 95 (56%) | 362 (61%) | 63 (55%) | 98 (56%) |
| OS_MONTHS | NA (NA, NA) | 12 (6, 22) | 11 (5, 18) | 16 (6, 30) | 16 (5, 34) |
| Unknown | 75 | 0 | 0 | 0 | 0 |
| OS_STATUS | |||||
| 75 (100%) | 0 (0%) | 1 (0.2%) | 0 (0%) | 0 (0%) | |
| 0:LIVING | 0 (0%) | 136 (80%) | 145 (25%) | 96 (84%) | 145 (83%) |
| 1:DECEASED | 0 (0%) | 33 (20%) | 444 (75%) | 18 (16%) | 29 (17%) |
| KARNOFSKY_PERFORMANCE_SCORE | |||||
| 20 | 0 (NA%) | 0 (0%) | 2 (0.5%) | 0 (0%) | 0 (0%) |
| 40 | 0 (NA%) | 1 (1.0%) | 16 (3.7%) | 0 (0%) | 1 (1.1%) |
| 50 | 0 (NA%) | 2 (1.9%) | 0 (0%) | 0 (0%) | 2 (2.2%) |
| 60 | 0 (NA%) | 4 (3.9%) | 85 (20%) | 2 (2.9%) | 4 (4.4%) |
| 70 | 0 (NA%) | 6 (5.8%) | 7 (1.6%) | 3 (4.4%) | 10 (11%) |
| 80 | 0 (NA%) | 20 (19%) | 248 (57%) | 10 (15%) | 11 (12%) |
| 90 | 0 (NA%) | 41 (40%) | 12 (2.8%) | 37 (54%) | 33 (36%) |
| 100 | 0 (NA%) | 29 (28%) | 65 (15%) | 16 (24%) | 30 (33%) |
| Unknown | 75 | 66 | 155 | 46 | 83 |
| 1 n (%); Median (Q1, Q3) | |||||
pts %>%
filter(!is.na(HISTOLOGICAL_DIAGNOSIS)) %>%
filter(HISTOLOGICAL_DIAGNOSIS!="") %>%
select(-PATIENT_ID) %>%
tbl_summary(by=HISTOLOGICAL_DIAGNOSIS ) %>%
add_n() %>%
add_p()
## The following errors were returned during `add_p()`:
## ✖ For variable `KARNOFSKY_PERFORMANCE_SCORE` (`HISTOLOGICAL_DIAGNOSIS`) and
## "estimate", "p.value", "conf.low", and "conf.high" statistics:
## FEXACT[f3xact()] error: hash key 6e+09 > INT_MAX, kyy=434, it[i (= nco = 5)]=
## 0. Rather set 'simulate.p.value=TRUE'
## ✖ For variable `OS_STATUS` (`HISTOLOGICAL_DIAGNOSIS`) and "estimate",
## "p.value", "conf.low", and "conf.high" statistics: FEXACT error 7(location).
## LDSTP=18480 is too small for this problem, (pastp=153.626,
## ipn_0:=ipoin[itp=140]=549, stp[ipn_0]=153.701). Increase workspace or
## consider using 'simulate.p.value=TRUE'
| Characteristic | N | astrocytoma N = 1691 |
glioblastoma N = 5901 |
oligoastrocytoma N = 1141 |
oligodendroglioma N = 1741 |
p-value2 |
|---|---|---|---|---|---|---|
| STUDY | 1,047 | <0.001 | ||||
| Brain Lower Grade Glioma | 169 (100%) | 0 (0%) | 114 (100%) | 174 (100%) | ||
| Glioblastoma multiforme | 0 (0%) | 590 (100%) | 0 (0%) | 0 (0%) | ||
| BCR_STATUS | 1,047 | <0.001 | ||||
| IGC | 113 (67%) | 585 (99%) | 88 (77%) | 131 (75%) | ||
| NCH | 56 (33%) | 5 (0.8%) | 26 (23%) | 43 (25%) | ||
| AGE | 1,047 | 39 (33, 51) | 59 (50, 68) | 38 (30, 51) | 46 (34, 54) | <0.001 |
| SEX | 1,047 | 0.4 | ||||
| Female | 74 (44%) | 228 (39%) | 51 (45%) | 76 (44%) | ||
| Male | 95 (56%) | 362 (61%) | 63 (55%) | 98 (56%) | ||
| OS_MONTHS | 1,047 | 12 (6, 22) | 11 (5, 18) | 16 (6, 30) | 16 (5, 34) | <0.001 |
| OS_STATUS | 1,047 | |||||
| 0 (0%) | 1 (0.2%) | 0 (0%) | 0 (0%) | |||
| 0:LIVING | 136 (80%) | 145 (25%) | 96 (84%) | 145 (83%) | ||
| 1:DECEASED | 33 (20%) | 444 (75%) | 18 (16%) | 29 (17%) | ||
| KARNOFSKY_PERFORMANCE_SCORE | 697 | |||||
| 20 | 0 (0%) | 2 (0.5%) | 0 (0%) | 0 (0%) | ||
| 40 | 1 (1.0%) | 16 (3.7%) | 0 (0%) | 1 (1.1%) | ||
| 50 | 2 (1.9%) | 0 (0%) | 0 (0%) | 2 (2.2%) | ||
| 60 | 4 (3.9%) | 85 (20%) | 2 (2.9%) | 4 (4.4%) | ||
| 70 | 6 (5.8%) | 7 (1.6%) | 3 (4.4%) | 10 (11%) | ||
| 80 | 20 (19%) | 248 (57%) | 10 (15%) | 11 (12%) | ||
| 90 | 41 (40%) | 12 (2.8%) | 37 (54%) | 33 (36%) | ||
| 100 | 29 (28%) | 65 (15%) | 16 (24%) | 30 (33%) | ||
| Unknown | 66 | 155 | 46 | 83 | ||
| 1 n (%); Median (Q1, Q3) | ||||||
| 2 Pearson’s Chi-squared test; Kruskal-Wallis rank sum test | ||||||
“Differential Genetic and Clinical Profiles of Low-Grade Gliomas and Glioblastoma: Implications for Diagnosis and Treatment”
Tables and Figures to Include: Tables:
Demographic and Clinical Characteristics of Patients:
Table including age, sex, diagnosis, treatment history, and clinical outcomes for both LGG and GBM patients. Frequency of Genetic Mutations:
Table listing key mutations (e.g., IDH1, TP53, ATRX) and their frequencies in LGG versus GBM. Copy-Number Alterations Comparison:
A detailed table showing the frequency of specific CNAs like CDKN2A/B loss or EGFR amplification in LGG and GBM. Survival Data:
Summary table of median survival times, along with the results from Kaplan-Meier analysis and Cox model outputs, stratified by LGG and GBM. Treatment Response Rates:
Table summarizing treatment types and response rates (complete response, partial response, stable disease, progression) for each tumor type. Figures:
Kaplan-Meier Survival Curves:
A figure displaying survival curves for LGG and GBM, highlighting significant differences in overall survival. Bar Graph of Mutational Frequencies:
A graph illustrating the percentage of LGG and GBM cases with mutations in key genes. Heatmap of Gene Expression Profiles:
Display differential gene expression patterns between LGG and GBM, focusing on pathways involved in cell cycle, apoptosis, and tumor progression. CNA Profile Plots:
Stacked bar charts or line plots showing the frequency of major CNAs across chromosome locations for both LGG and GBM. Box Plots of Biomarker Levels:
Compare the expression levels of potential biomarkers (e.g., proteins, RNA) in LGG versus GBM, using box plots to show distribution and outliers. Pie Charts of Treatment Distribution:
Pie charts showing the proportion of different treatment modalities used in LGG and GBM patients. Scatter Plots of Prognostic Biomarkers vs. Survival:
Scatter plots correlating key prognostic biomarkers with patient survival times, differentiated by tumor type. Additional Visual Aipts:
Pathway Diagrams: Diagrams illustrating key signaling pathways that are differently regulated in LGG and GBM, potentially based on transcriptomic data. Cluster Analysis: Dendrograms or other clustering figures to show how LGG and GBM group based on genetic or molecular profiles.