library(readxl)
#library(summarytools)
require(psych)
require(plyr)
require(dplyr)
require(graphics)
require(ggplot2)
require(svglite)
require(reshape2)
require(IDPmisc)
require(GLDEX)
require(ggfortify)
require(cluster)
require(ggrepel)
require(wesanderson)
require(gridExtra)
require(devtools)
#devtools::install_github("ewenharrison/finalfit")
require(finalfit)
require(table1)
require(MatchIt)
require(plotly)
require(rms)
require(xtable)
require(knitr)
require(corrplot)
require(EnvStats)
library(data.table)
library(stringr)
library(lubridate)
library(tidyr)
library(purrr)
library(readr)
library(tidytext)
library(widyr)
library(ggraph)
library(igraph)
phimat_fun<-function(x) {
xcol<-dim(x)[2]
newx<-matrix(NA,nrow=xcol,ncol=xcol)
for(i in 1:xcol) {
for(j in 1:xcol) {
a = as.numeric(unlist(x[,i]))
b = as.numeric(unlist(x[,j]))
newx[i,j]<-phi(table(a,b))
}
}
rownames(newx)<-colnames(newx)<-colnames(x)
return(newx)
}
# Get lower triangle of the correlation matrix
get_lower_tri<-function(phimat){
phimat[upper.tri(phimat)] <- NA
return(phimat)
}
# Get upper triangle of the correlation matrix
get_upper_tri <- function(phimat){
phimat[lower.tri(phimat)]<- NA
return(phimat)
}
reorder_phimat <- function(phimat){
# Use correlation between variables as distance
dd <- as.dist((1-phimat)/2)
hc <- hclust(dd)
phimat <-phimat[hc$order, hc$order]
}
load("/data/JMICC_cohort/R_workspace/DataClean.RData")
#load("/data/JMICC_cohort/R_workspace/DataModeling.RData")
## 男性全体
pca_diet_dat = subset(cleaned_survey, 性別 ==1 & 塩分制限!=1 & カロリー制限!=1 & 糖分制限!=1 & 脂肪制限!=1, select = c(ご飯, パン, めん, 肉類, 魚類, 油類, 脂類,乳類, 卵類, 野菜類, 豆類, 果実類, 菓子類, 有糖飲料類,コーヒー, 日本緑茶_量))
pca_diet = clara(scale(pca_diet_dat), 2)
autoplot(pca_diet,loadings=TRUE, loadings.label = TRUE, loadings.label.colour = "royalblue", loadings.label.size = 5, loadings.colour = "royalblue") + ggtitle("PCA analysis of 食事内容: 好みの傾向(食事制限なし男性)") +scale_color_manual(name="", labels=c("",""), breaks = c("1", "2"), values=c("grey", "grey"))+theme(legend.text = element_text(size = 16)) + theme_minimal()
## 女性全体
pca_diet_dat = subset(cleaned_survey, 性別 ==2 & 塩分制限!=1 & カロリー制限!=1 & 糖分制限!=1 & 脂肪制限!=1, select = c(ご飯, パン, めん, 肉類, 魚類, 油類, 脂類,乳類, 卵類, 野菜類, 豆類, 果実類, 菓子類, 有糖飲料類,コーヒー, 日本緑茶_量))
pca_diet = clara(scale(pca_diet_dat), 2)
autoplot(pca_diet,loadings=TRUE, loadings.label = TRUE, loadings.label.colour = "rosybrown", loadings.label.size = 5, loadings.colour = "rosybrown") + ggtitle("PCA analysis of 食事内容: 好みの傾向(食事制限なし女性)") +scale_color_manual(name="", labels=c("",""), breaks = c("1", "2"), values=c("grey", "grey"))+theme(legend.text = element_text(size = 16)) + theme_minimal()
ggplot(data=cleaned_survey, aes(x=身長_調査票,y=体重_現在, color=as.factor(性別))) + geom_point(alpha=0.2)+
scale_color_manual(name="性別", labels=c("男性","女性"), breaks = c("1", "2"),values=c("royalblue", "palevioletred")) + theme_minimal() + facet_grid(~性別) + ggtitle("身長体重") + xlab("身長")
## Warning: Removed 1982 rows containing missing values (geom_point).
ggplot(data=cleaned_survey, aes(x=開始時年齢,fill=as.factor(性別))) + geom_histogram(alpha=0.5, bins = 35)+
scale_fill_manual(name="性別", labels=c("男性","女性"), breaks = c("1", "2"),values=c("royalblue", "palevioletred")) + theme_minimal() + facet_grid(~性別) + ggtitle("年齢分布")
ggplot(data=cleaned_survey, aes(x=as.factor(開始時年齢),y=BMI, fill=as.factor(性別))) + geom_boxplot(alpha=0.7, color="lightgrey")+
scale_fill_manual(name="性別", labels=c("男性","女性"), breaks = c("1", "2"),values=c("royalblue", "palevioletred")) + theme_minimal() + facet_grid(~性別) + ggtitle("BMI") + xlab("開始時年齢")
## Warning: Removed 1982 rows containing non-finite values (stat_boxplot).
ggplot(data=cleaned_survey, aes(x=as.factor(開始時年齢),y=エネルギー, fill=as.factor(性別))) + geom_boxplot(alpha=0.7, color="lightgrey")+
scale_fill_manual(name="性別", labels=c("男性","女性"), breaks = c("1", "2"),values=c("royalblue", "palevioletred")) + theme_minimal() + facet_grid(~性別) + ggtitle("摂取エネルギー") + xlab("開始時年齢")
ggplot(data=cleaned_survey, aes(x=as.factor(開始時年齢),y=Daily身体活動メッツ, fill=as.factor(性別))) + geom_boxplot(alpha=0.7, color="lightgrey")+
scale_fill_manual(name="性別", labels=c("男性","女性"), breaks = c("1", "2"),values=c("royalblue", "palevioletred")) + theme_minimal() + facet_grid(~性別) + ggtitle("Daily身体活動メッツ") + xlab("開始時年齢")
ggplot(data=cleaned_survey, aes(x=as.factor(開始時年齢), y=中性脂肪, fill=as.factor(性別))) + geom_boxplot(alpha=0.7, color="lightgrey") + facet_grid(~性別)+ theme_minimal()+
scale_fill_manual(name="性別", labels=c("男性","女性"), breaks = c("1", "2"), values=c("royalblue", "palevioletred"))+ xlab("年齢") + ggtitle("HbA1c")
## Warning: Removed 18111 rows containing non-finite values (stat_boxplot).
ggplot(data=cleaned_survey, aes(x=as.factor(開始時年齢), y=HbA1c, fill=as.factor(性別))) + geom_boxplot(alpha=0.7, color="lightgrey") + facet_grid(~性別)+ theme_minimal()+
scale_fill_manual(name="性別", labels=c("男性","女性"), breaks = c("1", "2"), values=c("royalblue", "palevioletred"))+ xlab("年齢") + ggtitle("HbA1c")
## Warning: Removed 31096 rows containing non-finite values (stat_boxplot).
ggplot(data=cleaned_survey, aes(x=as.factor(開始時年齢), y=総コレステロール, fill=as.factor(性別))) + geom_boxplot(alpha=0.7, color="lightgrey") + facet_grid(~性別)+ theme_minimal()+
scale_fill_manual(name="性別", labels=c("男性","女性"), breaks = c("1", "2"), values=c("royalblue", "palevioletred"))+ xlab("年齢") + ggtitle("総コレステロール")
## Warning: Removed 25557 rows containing non-finite values (stat_boxplot).
ggplot(data=cleaned_survey, aes(x=as.factor(開始時年齢), y=HDLコレステロール, fill=as.factor(性別))) + geom_boxplot(alpha=0.7, color="lightgrey") + facet_grid(~性別)+ theme_minimal()+
scale_fill_manual(name="性別", labels=c("男性","女性"), breaks = c("1", "2"), values=c("royalblue", "palevioletred"))+ xlab("年齢") + ggtitle("HDLコレステロール")
## Warning: Removed 18106 rows containing non-finite values (stat_boxplot).
ggplot(data=cleaned_survey, aes(x=as.factor(開始時年齢), y=GOT, fill=as.factor(性別))) + geom_boxplot(alpha=0.7, color="lightgrey") + facet_grid(~性別)+ theme_minimal()+
scale_fill_manual(name="性別", labels=c("男性","女性"), breaks = c("1", "2"), values=c("royalblue", "palevioletred"))+ xlab("年齢") + ggtitle("GOT") + ylim(c(0,250))
## Warning: Removed 18118 rows containing non-finite values (stat_boxplot).
ggplot(data=cleaned_survey, aes(x=as.factor(開始時年齢), y=GPT, fill=as.factor(性別))) + geom_boxplot(alpha=0.7, color="lightgrey") + facet_grid(~性別)+ theme_minimal()+
scale_fill_manual(name="性別", labels=c("男性","女性"), breaks = c("1", "2"), values=c("royalblue", "palevioletred"))+ xlab("年齢") + ggtitle("GPT") + ylim(c(0,500))
## Warning: Removed 18110 rows containing non-finite values (stat_boxplot).
ggplot(data=cleaned_survey, aes(x=as.factor(開始時年齢), y=γGTP, fill=as.factor(性別))) + geom_boxplot(alpha=0.7, color="lightgrey") + facet_grid(~性別)+ theme_minimal()+
scale_fill_manual(name="性別", labels=c("男性","女性"), breaks = c("1", "2"), values=c("royalblue", "palevioletred"))+ xlab("年齢") + ggtitle("γGTP") + ylim(c(0,500))
## Warning: Removed 19115 rows containing non-finite values (stat_boxplot).
ggplot(data=cleaned_survey, aes(x=as.factor(開始時年齢), y=尿酸, fill=as.factor(性別))) + geom_boxplot(alpha=0.7, color="lightgrey") + facet_grid(~性別)+ theme_minimal()+
scale_fill_manual(name="性別", labels=c("男性","女性"), breaks = c("1", "2"), values=c("royalblue", "palevioletred"))+ xlab("年齢") + ggtitle("尿酸")
## Warning: Removed 22549 rows containing non-finite values (stat_boxplot).
ggplot(data=cleaned_survey, aes(x=as.factor(開始時年齢), y=クレアチニン, fill=as.factor(性別))) + geom_boxplot(alpha=0.7, color="lightgrey") + facet_grid(~性別)+ theme_minimal()+
scale_fill_manual(name="性別", labels=c("男性","女性"), breaks = c("1", "2"), values=c("royalblue", "palevioletred"))+ xlab("年齢") + ggtitle("クレアチニン")+ ylim(c(0,10))
## Warning: Removed 20141 rows containing non-finite values (stat_boxplot).
ggplot(data=cleaned_survey, aes(x=as.factor(開始時年齢), y=収縮期血圧, fill=as.factor(性別))) + geom_boxplot(alpha=0.7, color="lightgrey") + facet_grid(~性別)+ theme_minimal()+
scale_fill_manual(name="性別", labels=c("男性","女性"), breaks = c("1", "2"), values=c("royalblue", "palevioletred"))+ xlab("年齢") + ggtitle("収縮期血圧")
## Warning: Removed 19694 rows containing non-finite values (stat_boxplot).
ggplot(data=cleaned_survey, aes(x=as.factor(開始時年齢), y=拡張期血圧, fill=as.factor(性別))) + geom_boxplot(alpha=0.7, color="lightgrey") + facet_grid(~性別)+ theme_minimal()+
scale_fill_manual(name="性別", labels=c("男性","女性"), breaks = c("1", "2"), values=c("royalblue", "palevioletred"))+ xlab("年齢") + ggtitle("拡張期血圧")
## Warning: Removed 19736 rows containing non-finite values (stat_boxplot).
ggplot(data=cleaned_survey, aes(x=as.factor(開始時年齢), y=中性脂肪, color=as.factor(糖尿病))) + geom_point(size=0.5) + facet_grid(~性別)+ theme_minimal()+
scale_color_manual(name="糖尿病", labels=c("なし","かかっている","かかったことある"), breaks = c("1", "2", "3"), values=c("lightgrey", "salmon", "salmon"))+ xlab("年齢") + ylim(c(0,2000))
## Warning: Removed 22884 rows containing missing values (geom_point).
ggplot(data=cleaned_survey, aes(x=as.factor(開始時年齢), y=HbA1c, color=as.factor(糖尿病))) + geom_point(size=2) + facet_grid(~性別)+ theme_minimal()+
scale_color_manual(name="糖尿病", labels=c("なし","かかっている","かかったことある"), breaks = c("1", "2", "3"), values=c("lightgrey", "salmon", "salmon"))+ xlab("年齢")
## Warning: Removed 35817 rows containing missing values (geom_point).
ggplot(data=cleaned_survey, aes(x=as.factor(開始時年齢), y=総コレステロール, color=as.factor(糖尿病))) + geom_point(size=0.5) + facet_grid(~性別)+ theme_minimal()+
scale_color_manual(name="糖尿病", labels=c("なし","かかっている","かかったことある"), breaks = c("1", "2", "3"), values=c("lightgrey", "salmon", "salmon"))+ xlab("年齢")
## Warning: Removed 30311 rows containing missing values (geom_point).
ggplot(data=cleaned_survey, aes(x=as.factor(開始時年齢), y=HDLコレステロール, color=as.factor(糖尿病))) + geom_point(size=0.5) + facet_grid(~性別)+ theme_minimal()+
scale_color_manual(name="糖尿病", labels=c("なし","かかっている","かかったことある"), breaks = c("1", "2", "3"), values=c("lightgrey", "salmon", "salmon"))+ xlab("年齢")
## Warning: Removed 22876 rows containing missing values (geom_point).
ggplot(data=cleaned_survey, aes(x=as.factor(開始時年齢), y=GOT, color=as.factor(糖尿病))) + geom_point(size=0.5) + facet_grid(~性別)+ theme_minimal()+
scale_color_manual(name="糖尿病", labels=c("なし","かかっている","かかったことある"), breaks = c("1", "2", "3"), values=c("lightgrey", "salmon", "salmon"))+ xlab("年齢") + ylim(c(0,250))
## Warning: Removed 22888 rows containing missing values (geom_point).
ggplot(data=cleaned_survey, aes(x=as.factor(開始時年齢), y=GPT, color=as.factor(糖尿病))) + geom_point(size=0.5) + facet_grid(~性別)+ theme_minimal()+
scale_color_manual(name="糖尿病", labels=c("なし","かかっている","かかったことある"), breaks = c("1", "2", "3"), values=c("lightgrey", "salmon", "salmon"))+ xlab("年齢") + ylim(c(0,500))
## Warning: Removed 22880 rows containing missing values (geom_point).
ggplot(data=cleaned_survey, aes(x=as.factor(開始時年齢), y=γGTP, color=as.factor(糖尿病))) + geom_point(size=0.5) + facet_grid(~性別)+ theme_minimal()+
scale_color_manual(name="糖尿病", labels=c("なし","かかっている","かかったことある"), breaks = c("1", "2", "3"), values=c("lightgrey", "salmon", "salmon"))+ xlab("年齢") + ylim(c(0,500))
## Warning: Removed 23865 rows containing missing values (geom_point).
ggplot(data=cleaned_survey, aes(x=as.factor(開始時年齢), y=尿酸, color=as.factor(糖尿病))) + geom_point(size=0.5) + facet_grid(~性別)+ theme_minimal()+
scale_color_manual(name="糖尿病", labels=c("なし","かかっている","かかったことある"), breaks = c("1", "2", "3"), values=c("lightgrey", "salmon", "salmon"))+ xlab("年齢")
## Warning: Removed 27316 rows containing missing values (geom_point).
ggplot(data=cleaned_survey, aes(x=as.factor(開始時年齢), y=クレアチニン, color=as.factor(糖尿病))) + geom_point(size=0.5) + facet_grid(~性別)+ theme_minimal()+
scale_color_manual(name="糖尿病", labels=c("なし","かかっている","かかったことある"), breaks = c("1", "2", "3"), values=c("lightgrey", "salmon", "salmon"))+ xlab("年齢") + ylim(c(0,10))
## Warning: Removed 24896 rows containing missing values (geom_point).
ggplot(data=cleaned_survey, aes(x=as.factor(開始時年齢), y=収縮期血圧, color=as.factor(糖尿病))) + geom_point(size=0.5) + facet_grid(~性別)+ theme_minimal()+
scale_color_manual(name="糖尿病", labels=c("なし","かかっている","かかったことある"), breaks = c("1", "2", "3"), values=c("lightgrey", "salmon", "salmon"))+ xlab("年齢")+ xlab("年齢")
## Warning: Removed 24463 rows containing missing values (geom_point).
ggplot(data=cleaned_survey, aes(x=as.factor(開始時年齢), y=拡張期血圧, color=as.factor(糖尿病))) + geom_point(size=0.5) + facet_grid(~性別)+ theme_minimal()+
scale_color_manual(name="糖尿病", labels=c("なし","かかっている","かかったことある"), breaks = c("1", "2", "3"), values=c("lightgrey", "salmon", "salmon"))+ xlab("年齢")
## Warning: Removed 24505 rows containing missing values (geom_point).
biomarker_dat = cleaned_survey %>%
select(性別, 開始時年齢, 中性脂肪 , HbA1c , 総コレステロール , GOT , GPT , γGTP , 尿酸 , クレアチニン , 収縮期血圧 , 拡張期血圧) %>%
na.omit()
biomarker_dat$中性脂肪[biomarker_dat$中性脂肪 > 2000] = NA
biomarker_dat$GOT[biomarker_dat$GOT > 250] = NA
biomarker_dat$GPT[biomarker_dat$GPT > 500] = NA
biomarker_dat$γGTP[biomarker_dat$γGTP > 500] = NA
biomarker_dat$クレアチニン[biomarker_dat$クレアチニン > 10] = NA
biomarker_male = biomarker_dat %>% na.omit() %>% filter(性別==1, 開始時年齢>=35) %>% select(-c(性別,開始時年齢))
biomarker_pca_male = clara(scale(biomarker_male), 2)
autoplot(biomarker_pca_male,loadings=TRUE, loadings.label = TRUE, loadings.label.colour = "royalblue", loadings.label.size = 5, loadings.colour = "royalblue") + theme_minimal() +
ggtitle("PCA analysis of Biomarker") +scale_color_manual(name="", labels=c("",""), breaks = c("1", "2"), values=c("slategray1", "slategray1"))+theme(legend.text = element_text(size = 16)) + theme_minimal()
biomarker_female = biomarker_dat %>% na.omit() %>% filter(性別==2, 開始時年齢>=35 &開始時年齢<=45) %>% select(-c(性別,開始時年齢))
biomarker_pca_female = clara(scale(biomarker_female), 2)
autoplot(biomarker_pca_female,loadings=TRUE, loadings.label = TRUE, loadings.label.colour = "palevioletred", loadings.label.size = 5, loadings.colour = "palevioletred") + theme_minimal() +
ggtitle("PCA analysis of Biomarkers") +scale_color_manual(name="", labels=c("",""), breaks = c("1", "2"), values=c("seashell", "seashell"))+theme(legend.text = element_text(size = 16)) + theme_minimal()
revalue_disease = function(x){
if(is.na(x)){x=0}
else{
x = as.character(x)
x = revalue(x, c("1"=0, "2"=1, "3"=1, "4"=0, "9" =0))
x = as.numeric(x)
}}
disease_freq = cleaned_survey %>%
select(胃かいよう,十二指腸かいよう,慢性胃炎,大腸ポリープ,B型肝炎,C型肝炎,肝硬変,脂肪肝,結核,気管支喘息,慢性気管支炎,糖尿病,
高脂血症,高血圧,狭心症_心筋梗塞,脳卒中,乳腺症,卵巣の病気,卵巣切除,子宮内膜異型増殖症,子宮の病気,子宮切除,がん1)
disease_freq[which(disease_freq == 1, arr.ind = T)] = 0
disease_freq[which(disease_freq == 2, arr.ind = T)] = 1
disease_freq[which(disease_freq == 3, arr.ind = T)] = 1
disease_freq[which(disease_freq == 4, arr.ind = T)] = 0
disease_freq[which(disease_freq == 9, arr.ind = T)] = 0
disease_freq = cbind(subset(cleaned_survey, select=c(性別, 開始時年齢)), disease_freq)
disease_freq = disease_freq %>%
mutate(ag_group = factor(開始時年齢 > 55, labels = c("Below 55", "Above 55")), sex =factor(性別, labels = c("Male", "Female")))
my.render.cont <- function(x) {
x = x*100
with(stats.apply.rounding(stats.default(x), digits=2), c("% "=sprintf("%s %%", MEAN)))
}
table1(data=disease_freq, ~胃かいよう + 十二指腸かいよう + 慢性胃炎 + 大腸ポリープ + B型肝炎 + C型肝炎 + 肝硬変 + 脂肪肝 + 結核 + 気管支喘息 + 慢性気管支炎 + 糖尿病 +
高脂血症 + 高血圧 + 狭心症_心筋梗塞 + 脳卒中+ がん1 |ag_group*sex, render.continuous = my.render.cont, topclass = "Rtable1-zebra", overall=F)
Below 55 |
Above 55 |
|||
|---|---|---|---|---|
| Male (n=17217) |
Female (n=25168) |
Male (n=23665) |
Female (n=26562) |
|
| 胃かいよう | 14 % | 8.6 % | 22 % | 11 % |
| Missing | 3112 (18.1%) | 4963 (19.7%) | 5697 (24.1%) | 7740 (29.1%) |
| 十二指腸かいよう | 12 % | 5.6 % | 16 % | 6.9 % |
| Missing | 2705 (15.7%) | 4435 (17.6%) | 5464 (23.1%) | 7475 (28.1%) |
| 慢性胃炎 | 9.8 % | 11 % | 16 % | 14 % |
| Missing | 3128 (18.2%) | 4976 (19.8%) | 5724 (24.2%) | 7765 (29.2%) |
| 大腸ポリープ | 7.0 % | 3.0 % | 20 % | 8.8 % |
| Missing | 2709 (15.7%) | 4441 (17.6%) | 5464 (23.1%) | 7476 (28.1%) |
| B型肝炎 | 1.4 % | 0.98 % | 2.0 % | 1.3 % |
| Missing | 3106 (18.0%) | 4954 (19.7%) | 5700 (24.1%) | 7753 (29.2%) |
| C型肝炎 | 0.77 % | 0.61 % | 1.9 % | 1.5 % |
| Missing | 3112 (18.1%) | 4955 (19.7%) | 5699 (24.1%) | 7744 (29.2%) |
| 肝硬変 | 0.26 % | 0.11 % | 0.69 % | 0.25 % |
| Missing | 926 (5.4%) | 1131 (4.5%) | 2136 (9.0%) | 2525 (9.5%) |
| 脂肪肝 | 16 % | 4.2 % | 13 % | 7.5 % |
| Missing | 3148 (18.3%) | 4976 (19.8%) | 5746 (24.3%) | 7788 (29.3%) |
| 結核 | 1.1 % | 1.0 % | 3.8 % | 2.7 % |
| Missing | 2702 (15.7%) | 4441 (17.6%) | 5456 (23.1%) | 7471 (28.1%) |
| 気管支喘息 | 6.7 % | 7.5 % | 4.7 % | 5.8 % |
| Missing | 497 (2.9%) | 589 (2.3%) | 1843 (7.8%) | 2173 (8.2%) |
| 慢性気管支炎 | 1.8 % | 2.4 % | 2.2 % | 3.0 % |
| Missing | 3117 (18.1%) | 4956 (19.7%) | 5705 (24.1%) | 7760 (29.2%) |
| 糖尿病 | 4.7 % | 1.4 % | 14 % | 5.7 % |
| Missing | 487 (2.8%) | 599 (2.4%) | 1627 (6.9%) | 2141 (8.1%) |
| 高脂血症 | 13 % | 6.7 % | 19 % | 25 % |
| Missing | 487 (2.8%) | 577 (2.3%) | 1695 (7.2%) | 1789 (6.7%) |
| 高血圧 | 14 % | 6.7 % | 34 % | 26 % |
| Missing | 432 (2.5%) | 533 (2.1%) | 1302 (5.5%) | 1670 (6.3%) |
| 狭心症_心筋梗塞 | 1.6 % | 0.57 % | 6.2 % | 3.4 % |
| Missing | 909 (5.3%) | 1132 (4.5%) | 2035 (8.6%) | 2471 (9.3%) |
| 脳卒中 | 0.90 % | 0.55 % | 3.6 % | 2.0 % |
| Missing | 527 (3.1%) | 634 (2.5%) | 1851 (7.8%) | 2258 (8.5%) |
| がん1 | 3.4 % | 4.6 % | 10 % | 7.2 % |
table1(data=subset(disease_freq,性別 ==2), ~ 乳腺症 + 卵巣の病気 + 卵巣切除 + 子宮内膜異型増殖症 + 子宮の病気 + 子宮切除 |ag_group, render.continuous = my.render.cont, topclass = "Rtable1-zebra", overall=F)
| Below 55 (n=25168) |
Above 55 (n=26562) |
|
|---|---|---|
| 乳腺症 | 16 % | 15 % |
| Missing | 5027 (20.0%) | 7800 (29.4%) |
| 卵巣の病気 | 7.0 % | 8.8 % |
| Missing | 4995 (19.8%) | 7824 (29.5%) |
| 卵巣切除 | 87 % | 82 % |
| Missing | 19377 (77.0%) | 17487 (65.8%) |
| 子宮内膜異型増殖症 | 3.5 % | 2.8 % |
| Missing | 5115 (20.3%) | 7953 (29.9%) |
| 子宮の病気 | 13 % | 17 % |
| Missing | 5035 (20.0%) | 7863 (29.6%) |
| 子宮切除 | 81 % | 55 % |
| Missing | 19468 (77.4%) | 19668 (74.0%) |
## Male
fam_diabetes_male = cleaned_survey %>%
filter(性別==1)%>%
select(糖尿病,父親_糖尿病, 母親_糖尿病) %>%
mutate(息子 = 糖尿病==2 | 糖尿病==3, 父=父親_糖尿病==3, 母=母親_糖尿病==3) %>%
select(-c(糖尿病,父親_糖尿病, 母親_糖尿病))
phimat = round(phimat_fun(fam_diabetes_male),2)
# Reorder the Phi correlation matrix
phimat <- reorder_phimat(phimat)
upper_tri <- get_upper_tri(phimat)
# Melt the correlation matrix
melted_phimat <- melt(upper_tri, na.rm = TRUE)
## Warning in melt(upper_tri, na.rm = TRUE): The melt generic in data.table
## has been passed a matrix and will attempt to redirect to the relevant
## reshape2 method; please note that reshape2 is deprecated, and this
## redirection is now deprecated as well. To continue using melt methods from
## reshape2 while both libraries are attached, e.g. melt.list, you can prepend
## the namespace like reshape2::melt(upper_tri). In the next version, this
## warning will become an error.
# Create a ggheatmap
ggheatmap <- ggplot(melted_phimat, aes(Var2, Var1, fill = value))+
geom_tile(color = "white")+
scale_fill_gradient2(low = "seagreen", high = "salmon", mid = "white",
midpoint = 0, limit = c(-1,1), space = "Lab",
name="Phi\nCorrelation") +
# theme_minimal()+ # minimal theme
theme(axis.text.x = element_text(angle = 0, vjust = 1,
size = 16, hjust = 1), axis.text.y = element_text(angle = 0, vjust = 1,
size = 16, hjust = 1))+
coord_fixed()
# Print the heatmap
#print(ggheatmap)
ggheatmap +
geom_text(aes(Var2, Var1, label = value), color = "black", size = 8) +
theme(
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.grid.major = element_blank(),
panel.border = element_rect(colour = "black", fill=NA, size=2),
panel.background = element_blank(),
axis.ticks = element_blank(),
legend.direction = "horizontal")+
guides(fill = guide_colorbar(barwidth = 7, barheight = 1,
title.position = "top", title.hjust = 0.5))+
ggtitle("糖尿病の家族相関性:息子の場合")
## female
fam_diabetes_female = cleaned_survey %>%
filter(性別==2)%>%
select(糖尿病,父親_糖尿病, 母親_糖尿病) %>%
mutate(娘 = 糖尿病==2 | 糖尿病==3, 父=父親_糖尿病==3, 母=母親_糖尿病==3) %>%
select(-c(糖尿病,父親_糖尿病, 母親_糖尿病))
phimat = round(phimat_fun(fam_diabetes_female),2)
# Reorder the Phi correlation matrix
phimat <- reorder_phimat(phimat)
upper_tri <- get_upper_tri(phimat)
# Melt the correlation matrix
melted_phimat <- melt(upper_tri, na.rm = TRUE)
## Warning in melt(upper_tri, na.rm = TRUE): The melt generic in data.table
## has been passed a matrix and will attempt to redirect to the relevant
## reshape2 method; please note that reshape2 is deprecated, and this
## redirection is now deprecated as well. To continue using melt methods from
## reshape2 while both libraries are attached, e.g. melt.list, you can prepend
## the namespace like reshape2::melt(upper_tri). In the next version, this
## warning will become an error.
# Create a ggheatmap
ggheatmap <- ggplot(melted_phimat, aes(Var2, Var1, fill = value))+
geom_tile(color = "white")+
scale_fill_gradient2(low = "seagreen", high = "salmon", mid = "white",
midpoint = 0, limit = c(-1,1), space = "Lab",
name="Phi\nCorrelation") +
# theme_minimal()+ # minimal theme
theme(axis.text.x = element_text(angle = 0, vjust = 1,
size = 16, hjust = 1), axis.text.y = element_text(angle = 0, vjust = 1,
size = 16, hjust = 1))+
coord_fixed()
# Print the heatmap
#print(ggheatmap)
ggheatmap +
geom_text(aes(Var2, Var1, label = value), color = "black", size = 8) +
theme(
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.grid.major = element_blank(),
panel.border = element_rect(colour = "black", fill=NA, size=2),
panel.background = element_blank(),
axis.ticks = element_blank(),
legend.direction = "horizontal")+
guides(fill = guide_colorbar(barwidth = 7, barheight = 1,
title.position = "top", title.hjust = 0.5))+
ggtitle("糖尿病の家族相関性:娘の場合")