Revisiting the Lyme disease data, last time we normalized it to remove the negatie values, and the top genes in that data wasn’t even in the database of Lyme disease due to the merge that removed those top genes. So, we are just explaining that the genes when normalized flipped the expression dynamics so that the range of 0,1, meant that from zero to one, the lowest values are actually the highest values and vice versa.
So, we are going to pull those genes up now. from the last run of the Lyme disease normalization.
library(rmarkdown)
library(randomForest)
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
Lyme disease link
data <- read.csv('LymeDiseaseNormalizedFCsMeansAdded.csv', header=T)
paged_table(data[1:10,]) #19,526 X 95
colnames(data)
## [1] "Gene" "healthyControl_1"
## [3] "healthyControl_2" "healthyControl_3"
## [5] "healthyControl_4" "healthyControl_5"
## [7] "healthyControl_6" "healthyControl_7"
## [9] "healthyControl_8" "healthyControl_9"
## [11] "healthyControl_10" "healthyControl_11"
## [13] "healthyControl_12" "healthyControl_13"
## [15] "healthyControl_14" "healthyControl_15"
## [17] "healthyControl_16" "healthyControl_17"
## [19] "healthyControl_18" "healthyControl_19"
## [21] "healthyControl_20" "healthyControl_21"
## [23] "acuteLymeDisease_1" "acuteLymeDisease_2"
## [25] "acuteLymeDisease_3" "acuteLymeDisease_4"
## [27] "acuteLymeDisease_5" "acuteLymeDisease_6"
## [29] "acuteLymeDisease_7" "acuteLymeDisease_8"
## [31] "acuteLymeDisease_9" "acuteLymeDisease_10"
## [33] "acuteLymeDisease_11" "acuteLymeDisease_12"
## [35] "acuteLymeDisease_13" "acuteLymeDisease_14"
## [37] "acuteLymeDisease_15" "acuteLymeDisease_16"
## [39] "acuteLymeDisease_17" "acuteLymeDisease_18"
## [41] "acuteLymeDisease_19" "acuteLymeDisease_20"
## [43] "acuteLymeDisease_21" "acuteLymeDisease_22"
## [45] "acuteLymeDisease_23" "acuteLymeDisease_24"
## [47] "acuteLymeDisease_25" "acuteLymeDisease_26"
## [49] "acuteLymeDisease_27" "acuteLymeDisease_28"
## [51] "Antibodies_1month_1" "Antibodies_1month_2"
## [53] "Antibodies_1month_3" "Antibodies_1month_4"
## [55] "Antibodies_1month_5" "Antibodies_1month_6"
## [57] "Antibodies_1month_7" "Antibodies_1month_8"
## [59] "Antibodies_1month_9" "Antibodies_1month_10"
## [61] "Antibodies_1month_11" "Antibodies_1month_12"
## [63] "Antibodies_1month_13" "Antibodies_1month_14"
## [65] "Antibodies_1month_15" "Antibodies_1month_16"
## [67] "Antibodies_1month_17" "Antibodies_1month_18"
## [69] "Antibodies_1month_19" "Antibodies_1month_20"
## [71] "Antibodies_1month_21" "Antibodies_1month_22"
## [73] "Antibodies_1month_23" "Antibodies_1month_24"
## [75] "Antibodies_1month_25" "Antibodies_1month_26"
## [77] "Antibodies_1month_27" "Antibodies_6months_1"
## [79] "Antibodies_6months_2" "Antibodies_6months_3"
## [81] "Antibodies_6months_4" "Antibodies_6months_5"
## [83] "Antibodies_6months_6" "Antibodies_6months_7"
## [85] "Antibodies_6months_8" "Antibodies_6months_9"
## [87] "Antibodies_6months_10" "healthy_mean"
## [89] "acute_mean" "month1_mean"
## [91] "month6_mean" "foldchange_acute_healthy"
## [93] "foldchange_1month_healthy" "foldchange_6month_healthy"
## [95] "foldchange_6month_acute"
Now we are going to order by the foldchange value for each of acute, 1 month of antibodies, 6 months of antibodies, and taking the top ten stimulated and top ten inhibited.
acute <- grep('acute', colnames(data)[1:87])
healthy <- grep('healthy', colnames(data)[1:87])
month1 <- grep('1month',colnames(data)[1:87])
month6 <- grep('6month',colnames(data)[1:87])
acute
## [1] 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
## [26] 48 49 50
There are 28 acute samples.
healthy
## [1] 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
There are 21 healthy samples.
month1
## [1] 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
## [26] 76 77
There are 27 one month antibodies developed samples.
month6
## [1] 78 79 80 81 82 83 84 85 86 87
There are only 10 six months antibodies developed samples.
class <- 'class'
class[1:21] <- 'Healthy'
class[22:49] <- 'acute'
class[50:76] <- '1 month infected'
class[77:86] <- '6 months infected'
table(class)
## class
## 1 month infected 6 months infected acute Healthy
## 27 10 28 21
There are 86 samples total.
Now, we are going to order by foldchange values and get the top genes in each class.
acuteordered <- data[order(data$foldchange_acute_healthy, decreasing=T),]
acute20 <- acuteordered[c(1:10,19517:19526),]
month1Ordered <- data[order(data$foldchange_1month_healthy, decreasing=T),]
month1_20 <- month1Ordered[c(1:10,19517:19526),]
month6Ordered <- data[order(data$foldchange_6month_healthy, decreasing=T),]
month6_20 <- month6Ordered[c(1:10,19517:19526),]
acuteDB <- acute20[,c(1:87,92)]
month1DB <- month1_20[,c(1:87,93)]
month6DB <- month6_20[,c(1:87,94)]
colnames(acuteDB)[88] <- "foldchange"
colnames(month1DB)[88] <- "foldchange"
colnames(month6DB)[88] <- "foldchange"
acuteDB$topGeneSource <- "acute over healthy mean values for foldchange top genes Lyme Disease with 28 acute and 21 healthy samples"
month1DB$topGeneSource <- "One month of antibodies over healthy mean values for comparison foldchange values for top genes in Lyme Disease with 27 1 month antibodies samples and 21 healthy samples"
month6DB$topGeneSource <- "Six months of antibodies over healthy mean values for comparison foldchange values of top genes in Lyme disease with 10 samples of 6 months antibodies and 21 healthy samples. "
topGenes60 <- rbind(acuteDB, month1DB, month6DB)
paged_table(topGenes60)
Lets see if any duplicate genes.
topGenes60_b <- topGenes60[!duplicated(topGenes60$Gene),]
dim(topGenes60_b)
## [1] 52 89
write.csv(topGenes60, 'topGenesLyme60duplicates.csv', row.names=F)
Now for the matrix to test these genes, we will use the topGenes60_b dataset so there aren’t any duplicate genes that are the features in our matrix due to need for avoiding multicollinearity in machine learning.
data52 <- data[which(data$Gene %in% topGenes60_b$Gene),1:87]
data52_mx <- data.frame(t(data52[,2:87]))
colnames(data52_mx) <- data52$Gene
data52_mx$class <- as.factor(class)
paged_table(data52_mx)
set.seed(456)
inTrain <- sample(1:86, .8*86)
training <- data52_mx[inTrain,]
testing <- data52_mx[-inTrain,]
table(training$class)
##
## 1 month infected 6 months infected acute Healthy
## 23 9 20 16
table(testing$class)
##
## 1 month infected 6 months infected acute Healthy
## 4 1 8 5
rf <- randomForest(training[1:52], training$class, mtry=16, ntree=5000, confusion=T)
rf$confusion
## 1 month infected 6 months infected acute Healthy class.error
## 1 month infected 12 0 8 3 0.4782609
## 6 months infected 0 6 1 2 0.3333333
## acute 7 2 10 1 0.5000000
## Healthy 1 1 2 12 0.2500000
Lets see how well it predicts on the hold out validation 20% dataset.
predicted <- predict(rf,testing)
results <- data.frame(predicted=predicted, actual=testing$class)
results
## predicted actual
## healthyControl_3 Healthy Healthy
## healthyControl_7 Healthy Healthy
## healthyControl_9 Healthy Healthy
## healthyControl_18 Healthy Healthy
## healthyControl_21 Healthy Healthy
## acuteLymeDisease_1 acute acute
## acuteLymeDisease_5 Healthy acute
## acuteLymeDisease_8 acute acute
## acuteLymeDisease_11 acute acute
## acuteLymeDisease_15 1 month infected acute
## acuteLymeDisease_16 1 month infected acute
## acuteLymeDisease_21 acute acute
## acuteLymeDisease_24 Healthy acute
## Antibodies_1month_4 Healthy 1 month infected
## Antibodies_1month_6 acute 1 month infected
## Antibodies_1month_14 1 month infected 1 month infected
## Antibodies_1month_26 Healthy 1 month infected
## Antibodies_6months_1 6 months infected 6 months infected
All healthy and 6 months infected samples were predicted correctly while the acute class only had 4/8 correctly predicted as acute, and the 1 month infected class had only 1/4 predicted correctly.
So if we go back and look at those genes that were duplicated we could possibly find a better set of target genes in these samples for predicting Lyme disease. Lets see.
duplicates <- topGenes60[duplicated(topGenes60$Gene),]
duplicates$Gene
## [1] "UHMK1" "BCL2" "GPR183" "CSNK1G3" "CDK5RAP3" "C7orf60" "RBMX"
## [8] "CAMK4"
Lets test these genes, in the previous work in elderly lymphomas of polymorphic and monomorphic diffuse large b-cell and classic hodgkins lymphomas, we saw that the genes that were duplicated were the better set of genes. Now, we will see if that is true here.
duplicatedGenes <- duplicates$Gene
doublesDB <- data[data$Gene %in% duplicatedGenes,1:87]
doublesMX <- data.frame(t(doublesDB[2:87]))
colnames(doublesMX) <- doublesDB$Gene
doublesMX$class <- as.factor(class)
paged_table(doublesMX)
set.seed(567)
inTrain <- sample(1:86, .8*86)
training <- doublesMX[inTrain,]
testing <- doublesMX[-inTrain,]
table(training$class)
##
## 1 month infected 6 months infected acute Healthy
## 21 7 20 20
table(testing$class)
##
## 1 month infected 6 months infected acute Healthy
## 6 3 8 1
rf8 <- randomForest(training[1:8],training$class, mtry=3, ntree=5000, confusion=T)
rf8$confusion
## 1 month infected 6 months infected acute Healthy class.error
## 1 month infected 10 0 5 6 0.5238095
## 6 months infected 2 2 1 2 0.7142857
## acute 4 2 11 3 0.4500000
## Healthy 2 1 0 17 0.1500000
These genes are actually worse but lets see how well they predict.
predict8 <- predict(rf8, testing)
results8 <- data.frame(predicted=predict8, actual=testing$class)
results8
## predicted actual
## healthyControl_11 Healthy Healthy
## acuteLymeDisease_9 acute acute
## acuteLymeDisease_12 1 month infected acute
## acuteLymeDisease_14 1 month infected acute
## acuteLymeDisease_18 1 month infected acute
## acuteLymeDisease_23 1 month infected acute
## acuteLymeDisease_24 1 month infected acute
## acuteLymeDisease_27 Healthy acute
## acuteLymeDisease_28 Healthy acute
## Antibodies_1month_1 1 month infected 1 month infected
## Antibodies_1month_3 acute 1 month infected
## Antibodies_1month_12 acute 1 month infected
## Antibodies_1month_14 1 month infected 1 month infected
## Antibodies_1month_17 1 month infected 1 month infected
## Antibodies_1month_19 acute 1 month infected
## Antibodies_6months_1 6 months infected 6 months infected
## Antibodies_6months_5 Healthy 6 months infected
## Antibodies_6months_9 6 months infected 6 months infected
The healthy sample was correctly identified but the 6 months infected only 2/3 correct, the acute only 1/8 correctly predicted, and the 1 month infected only had 3/6 correctly predicted.
These genes may not be great target genes, but we used them in a 4 class model instead of a 2 class model. So, they could be useful. But the best set of genes were the 52 top genes and not the 8 duplicated genes.
link to pathologies current database is here
path <- "your path to pathologies data"
setwd(path)
pathologies <- read.csv("pathologyDB_CFS_added_May25_2026.csv", header=T)
paged_table(pathologies) #581X7
table(pathologies$topGenePathology)
##
## Acute Infectious Mononucleosis and Chronic Active Epstein-Barr Virus
## 40
## Chronic Fatigue Syndrome (CFS) or Systemic Exhertion Intolerance Disease (SEID) or Myalgic Encephalomyelitis
## 20
## classic Hodgkin's Lymphoma, and/or EBV, and/or HIV
## 52
## colon cancer cells AKA adenocarcinoma
## 57
## EBV & CHL elderly 50-94 years
## 13
## EBV & mDLBCL elderly 50-94 years
## 13
## EBV & pDLBCL elderly 50-94 years
## 13
## EBVaNPC nasopharyngeal carcinoma with EBV infection
## 34
## Epstein Barr Virus
## 80
## fibromyalgia
## 15
## Gastric Carcinoma and Peritoneal Metastatic Gastric Carcioma
## 30
## intrahepatic cholangiocarcinoma ICC
## 37
## language Autism
## 16
## Lyme Disease 6 months
## 33
## mild Autism
## 12
## mononucleosis
## 15
## Multiple Sclerosis
## 41
## NKTCL Natural Killer T-Cell Lymphoma & EBV
## 19
## savant Autism
## 14
## uterine fibroid myometrial tissue
## 27
lyme33 <- pathologies[pathologies$topGenePathology == "Lyme Disease 6 months",]
lyme33$Genecards_ID
## [1] "ISG20" "CLEC2L" "PSMF1" "RNF168" "PEX26" "F2"
## [7] "KCNJ16" "MAP2K7" "ESYT1" "GATC" "ENO1" "CYP7B1"
## [13] "IGFALS" "OR52A4" "INAFM1" "DLG3" "TMEM194A" "RGPD3"
## [19] "HPGD" "SLC1A1" "NUDT18" "LOC400657" "OTOS" "HECW1"
## [25] "POU4F2" "FRS3" "PDZRN3" "KHDRBS3" "CENPF" "FAM162A"
## [31] "CABP1" "POU3F2" "CTXN3"
Lets test these genes to see if they are still good predictors of class of Lyme disease.
lyme33DB <- data[data$Gene %in% lyme33$Genecards_ID,1:87]
lyme33DB$Gene
## [1] "RNF168" "ISG20" "NUDT18" "ESYT1" "OTOS" "CYP7B1"
## [7] "HECW1" "HPGD" "GATC" "FAM162A" "PSMF1" "POU3F2"
## [13] "PDZRN3" "MAP2K7" "OR52A4" "CENPF" "PEX26" "SLC1A1"
## [19] "DLG3" "POU4F2" "ENO1" "LOC400657" "CLEC2L" "CTXN3"
## [25] "RGPD3" "TMEM194A" "IGFALS" "KCNJ16" "F2" "CABP1"
## [31] "KHDRBS3" "FRS3"
lyme33MX <- data.frame(t(lyme33DB[,2:87]))
colnames(lyme33MX) <- lyme33DB$Gene
lyme33MX$class <- as.factor(class)
paged_table(lyme33MX)
Now, lets test these genes and see if they are the best set to predict a 4 class model of healthy, acute, 1 month infected, or 6 months infected.
set.seed(235)
inTrain <- sample(1:86,.8*86)
training <- lyme33MX[inTrain,]
testing <- lyme33MX[-inTrain,]
table(training$class)
##
## 1 month infected 6 months infected acute Healthy
## 22 8 24 14
table(testing$class)
##
## 1 month infected 6 months infected acute Healthy
## 5 2 4 7
rf33 <- randomForest(training[1:32], training$class, mtry=10, ntree=5000, confusion=T)
rf33$confusion
## 1 month infected 6 months infected acute Healthy class.error
## 1 month infected 14 1 5 2 0.3636364
## 6 months infected 2 5 1 0 0.3750000
## acute 5 0 17 2 0.2916667
## Healthy 6 0 7 1 0.9285714
The overall results are better for the acute, 1 month infected, and 6 months infected, but the healthy class was much worse and only 1/14 predicted correctly. The healthy samples were misidentified as either acute infection or 1 month infected.
Lets see how well it predicts on the hold out validation 20% data.
predict33 <- predict(rf33, testing)
results <- data.frame(predicted=predict33, actual=testing$class)
results
## predicted actual
## healthyControl_1 1 month infected Healthy
## healthyControl_2 6 months infected Healthy
## healthyControl_3 Healthy Healthy
## healthyControl_7 acute Healthy
## healthyControl_16 acute Healthy
## healthyControl_17 acute Healthy
## healthyControl_18 Healthy Healthy
## acuteLymeDisease_5 acute acute
## acuteLymeDisease_24 acute acute
## acuteLymeDisease_25 acute acute
## acuteLymeDisease_26 acute acute
## Antibodies_1month_4 acute 1 month infected
## Antibodies_1month_7 acute 1 month infected
## Antibodies_1month_14 acute 1 month infected
## Antibodies_1month_23 acute 1 month infected
## Antibodies_1month_24 1 month infected 1 month infected
## Antibodies_6months_2 acute 6 months infected
## Antibodies_6months_4 6 months infected 6 months infected
These genes are also not that great. Even worse for the healthy class. In the training model seemed to be ok. For healthy, 2/7 correct. For acute, 4/4 correct. For 1 month infected, 1/5 correct. And for 6 months infected 1/2 correct.
Since the 52 genes are better overall, we will use those genes.
lyme33[1,]
## Ensembl_ID Genecards_ID FC_pathology_control topGenePathology
## 96 ENSG00000172183 ISG20 154077.3 Lyme Disease 6 months
## mediaType
## 96 RBCs of PBMCs array format
## studySummarized
## 96 Lyme disease samples had more acute, healthy, and 1 month infection than the chronic infection samples. The blood of peripheral blood mononuclear cells was examined by array and not high throughput analysis.There were 10 chronic cases of 6 months infection after antibiotics, 21 cases of healthy and uninfected, 28 cases of acute infection, and 27 cases of infected 1 month with antibiotics.
## GSE_study_ID
## 96 GSE145974
colnames(lyme33)
## [1] "Ensembl_ID" "Genecards_ID" "FC_pathology_control"
## [4] "topGenePathology" "mediaType" "studySummarized"
## [7] "GSE_study_ID"
colnames(topGenes60_b)
## [1] "Gene" "healthyControl_1" "healthyControl_2"
## [4] "healthyControl_3" "healthyControl_4" "healthyControl_5"
## [7] "healthyControl_6" "healthyControl_7" "healthyControl_8"
## [10] "healthyControl_9" "healthyControl_10" "healthyControl_11"
## [13] "healthyControl_12" "healthyControl_13" "healthyControl_14"
## [16] "healthyControl_15" "healthyControl_16" "healthyControl_17"
## [19] "healthyControl_18" "healthyControl_19" "healthyControl_20"
## [22] "healthyControl_21" "acuteLymeDisease_1" "acuteLymeDisease_2"
## [25] "acuteLymeDisease_3" "acuteLymeDisease_4" "acuteLymeDisease_5"
## [28] "acuteLymeDisease_6" "acuteLymeDisease_7" "acuteLymeDisease_8"
## [31] "acuteLymeDisease_9" "acuteLymeDisease_10" "acuteLymeDisease_11"
## [34] "acuteLymeDisease_12" "acuteLymeDisease_13" "acuteLymeDisease_14"
## [37] "acuteLymeDisease_15" "acuteLymeDisease_16" "acuteLymeDisease_17"
## [40] "acuteLymeDisease_18" "acuteLymeDisease_19" "acuteLymeDisease_20"
## [43] "acuteLymeDisease_21" "acuteLymeDisease_22" "acuteLymeDisease_23"
## [46] "acuteLymeDisease_24" "acuteLymeDisease_25" "acuteLymeDisease_26"
## [49] "acuteLymeDisease_27" "acuteLymeDisease_28" "Antibodies_1month_1"
## [52] "Antibodies_1month_2" "Antibodies_1month_3" "Antibodies_1month_4"
## [55] "Antibodies_1month_5" "Antibodies_1month_6" "Antibodies_1month_7"
## [58] "Antibodies_1month_8" "Antibodies_1month_9" "Antibodies_1month_10"
## [61] "Antibodies_1month_11" "Antibodies_1month_12" "Antibodies_1month_13"
## [64] "Antibodies_1month_14" "Antibodies_1month_15" "Antibodies_1month_16"
## [67] "Antibodies_1month_17" "Antibodies_1month_18" "Antibodies_1month_19"
## [70] "Antibodies_1month_20" "Antibodies_1month_21" "Antibodies_1month_22"
## [73] "Antibodies_1month_23" "Antibodies_1month_24" "Antibodies_1month_25"
## [76] "Antibodies_1month_26" "Antibodies_1month_27" "Antibodies_6months_1"
## [79] "Antibodies_6months_2" "Antibodies_6months_3" "Antibodies_6months_4"
## [82] "Antibodies_6months_5" "Antibodies_6months_6" "Antibodies_6months_7"
## [85] "Antibodies_6months_8" "Antibodies_6months_9" "Antibodies_6months_10"
## [88] "foldchange" "topGeneSource"
lyme52 <- topGenes60_b[,c(1,88,89)]
paged_table(lyme52)
“Genecards_ID” “FC_pathology_control” [4] “topGenePathology”
“mediaType” “studySummarized”
[7] “GSE_study_ID”
colnames(lyme52) <- c("Genecards_ID", "FC_pathology_control",
"studySummarized")
lyme52$topGenePathology <- lyme33$topGenePathology[1]
lyme52$mediaType <- lyme33$mediaType[1]
lyme52$GSE_study_ID <- lyme33$GSE_study_ID[1]
lyme52$studySummarized <- paste(lyme33$studySummarized[1],lyme52$studySummarized, sep="_")
lyme52$studySummarized <- paste(lyme52$studySummarized,"these values were normalized from original data in Excel for max minus min over the range of max-min to remove the negative values, and made the larger values the lower ones and smaller values the higher ones, but the foldchange values are still valid but could be different than up or down as in the opposite.", sep=" - Note - ")
Lets add the Ensembl IDs from another dataset.
path1 <- 'path to your ensembl IDs'
setwd(path1)
ensembl <- read.csv("GSE271486_ensembleIDs_NPC_LBMP_study.csv", header=T)
colnames(ensembl)
## [1] "gene_id" "gene_name" "description"
## [4] "locus" "HNE_1_MUT_LMP1_1_count" "HNE_1_MUT_LMP1_2_count"
## [7] "HNE_1_MUT_LMP1_3_count" "HNE_1_WT_LMP1_1_count" "HNE_1_WT_LMP1_2_count"
## [10] "HNE_1_WT_LMP1_3_count" "HNE_1_MUT_LMP1_1_FPKM" "HNE_1_MUT_LMP1_2_FPKM"
## [13] "HNE_1_MUT_LMP1_3_FPKM" "HNE_1_WT_LMP1_1_FPKM" "HNE_1_WT_LMP1_2_FPKM"
## [16] "HNE_1_WT_LMP1_3_FPKM"
We only want columns 1 through 2.
ensembl2 <- ensembl[,1:2]
paged_table(ensembl2[1:10,])
Lets merge this to lyme52.
lyme52_b <- merge(lyme52,ensembl2, by.x="Genecards_ID", by.y="gene_name")
colnames(lyme52_b)[7] <- "Ensembl_ID"
colnames(lyme52_b)
## [1] "Genecards_ID" "FC_pathology_control" "studySummarized"
## [4] "topGenePathology" "mediaType" "GSE_study_ID"
## [7] "Ensembl_ID"
colnames(pathologies)
## [1] "Ensembl_ID" "Genecards_ID" "FC_pathology_control"
## [4] "topGenePathology" "mediaType" "studySummarized"
## [7] "GSE_study_ID"
lyme52_c <- lyme52_b[,c(7,1,2,4,5,3,6)]
colnames(lyme52_c)
## [1] "Ensembl_ID" "Genecards_ID" "FC_pathology_control"
## [4] "topGenePathology" "mediaType" "studySummarized"
## [7] "GSE_study_ID"
The feature names match to combine, but I want to remove the old top genes for lyme disease.
Pathology <- subset(pathologies, pathologies$topGenePathology != "Lyme Disease 6 months")
The new pathology database:
newP <- rbind(Pathology, lyme52_c)
paged_table(newP) #591X7
Lets write this new pathology database out to the folder of current pathologies.
setwd(path)
write.csv(newP,'pathologyDB_LymeFCsChangesAdded_5_31_2026.csv', row.names=F)