Load libraries

library(readxl)
#library(summarytools)
require(psych) 
require(plyr)
require(dplyr)
require(graphics)
require(ggplot2)
require(svglite)
require(reshape2)
require(IDPmisc)
require(GLDEX)
require(ggfortify)
require(cluster)
require(ggrepel)
require(wesanderson)
require(gridExtra)
require(devtools)
#devtools::install_github("ewenharrison/finalfit")
require(finalfit)
require(table1)
require(MatchIt)
require(plotly)
require(rms)
require(xtable)
require(knitr)
require(corrplot)
require(EnvStats)

library(data.table)
library(stringr)
library(lubridate)


library(tidyr)
library(purrr)
library(readr)


library(tidytext)
library(widyr)

library(ggraph)
library(igraph)

define phi matrix functions

phimat_fun<-function(x) { 
 xcol<-dim(x)[2] 
 newx<-matrix(NA,nrow=xcol,ncol=xcol) 
 for(i in 1:xcol) { 
  
  for(j in 1:xcol) {
    a = as.numeric(unlist(x[,i]))
    b = as.numeric(unlist(x[,j]))
    newx[i,j]<-phi(table(a,b)) 
  }
 } 
 rownames(newx)<-colnames(newx)<-colnames(x) 
 return(newx) 
} 



# Get lower triangle of the correlation matrix
  get_lower_tri<-function(phimat){
    phimat[upper.tri(phimat)] <- NA
    return(phimat)
  }
  # Get upper triangle of the correlation matrix
  get_upper_tri <- function(phimat){
    phimat[lower.tri(phimat)]<- NA
    return(phimat)
  }
  


reorder_phimat <- function(phimat){
# Use correlation between variables as distance
dd <- as.dist((1-phimat)/2)
hc <- hclust(dd)
phimat <-phimat[hc$order, hc$order]
}
load("/data/JMICC_cohort/R_workspace/DataClean.RData")
#load("/data/JMICC_cohort/R_workspace/DataModeling.RData")

PCA analysis of 食事内容: 好みの傾向

##  男性全体
pca_diet_dat = subset(cleaned_survey, 性別 ==1 & 塩分制限!=1 & カロリー制限!=1 & 糖分制限!=1 & 脂肪制限!=1, select = c(ご飯, パン, めん,  肉類, 魚類, 油類, 脂類,乳類, 卵類, 野菜類, 豆類, 果実類,  菓子類, 有糖飲料類,コーヒー, 日本緑茶_量))

pca_diet = clara(scale(pca_diet_dat), 2)
autoplot(pca_diet,loadings=TRUE, loadings.label = TRUE, loadings.label.colour = "royalblue", loadings.label.size  = 5, loadings.colour = "royalblue") + ggtitle("PCA analysis of 食事内容: 好みの傾向(食事制限なし男性)") +scale_color_manual(name="", labels=c("",""), breaks = c("1", "2"), values=c("grey", "grey"))+theme(legend.text = element_text(size = 16)) + theme_minimal()

##  女性全体
pca_diet_dat = subset(cleaned_survey, 性別 ==2 & 塩分制限!=1 & カロリー制限!=1 & 糖分制限!=1 & 脂肪制限!=1, select = c(ご飯, パン, めん,  肉類, 魚類, 油類, 脂類,乳類, 卵類, 野菜類, 豆類, 果実類,  菓子類, 有糖飲料類,コーヒー, 日本緑茶_量))

pca_diet = clara(scale(pca_diet_dat), 2)
autoplot(pca_diet,loadings=TRUE, loadings.label = TRUE, loadings.label.colour = "rosybrown", loadings.label.size  = 5, loadings.colour = "rosybrown") + ggtitle("PCA analysis of 食事内容: 好みの傾向(食事制限なし女性)") +scale_color_manual(name="", labels=c("",""), breaks = c("1", "2"), values=c("grey", "grey"))+theme(legend.text = element_text(size = 16)) + theme_minimal()

Basic biographic information plot

ggplot(data=cleaned_survey, aes(x=身長_調査票,y=体重_現在, color=as.factor(性別))) + geom_point(alpha=0.2)+ 
  scale_color_manual(name="性別", labels=c("男性","女性"), breaks = c("1", "2"),values=c("royalblue", "palevioletred")) + theme_minimal() + facet_grid(~性別) + ggtitle("身長体重") + xlab("身長")
## Warning: Removed 1982 rows containing missing values (geom_point).

ggplot(data=cleaned_survey, aes(x=開始時年齢,fill=as.factor(性別))) + geom_histogram(alpha=0.5, bins = 35)+ 
  scale_fill_manual(name="性別", labels=c("男性","女性"), breaks = c("1", "2"),values=c("royalblue", "palevioletred")) + theme_minimal() + facet_grid(~性別) + ggtitle("年齢分布") 

ggplot(data=cleaned_survey, aes(x=as.factor(開始時年齢),y=BMI, fill=as.factor(性別))) + geom_boxplot(alpha=0.7, color="lightgrey")+ 
  scale_fill_manual(name="性別", labels=c("男性","女性"), breaks = c("1", "2"),values=c("royalblue", "palevioletred")) + theme_minimal() + facet_grid(~性別) + ggtitle("BMI") + xlab("開始時年齢")
## Warning: Removed 1982 rows containing non-finite values (stat_boxplot).

ggplot(data=cleaned_survey, aes(x=as.factor(開始時年齢),y=エネルギー, fill=as.factor(性別))) + geom_boxplot(alpha=0.7, color="lightgrey")+ 
  scale_fill_manual(name="性別", labels=c("男性","女性"), breaks = c("1", "2"),values=c("royalblue", "palevioletred")) + theme_minimal() + facet_grid(~性別) + ggtitle("摂取エネルギー") + xlab("開始時年齢")

ggplot(data=cleaned_survey, aes(x=as.factor(開始時年齢),y=Daily身体活動メッツ, fill=as.factor(性別))) + geom_boxplot(alpha=0.7, color="lightgrey")+ 
  scale_fill_manual(name="性別", labels=c("男性","女性"), breaks = c("1", "2"),values=c("royalblue", "palevioletred")) + theme_minimal() + facet_grid(~性別) + ggtitle("Daily身体活動メッツ") + xlab("開始時年齢")

Biomarker plots

ggplot(data=cleaned_survey, aes(x=as.factor(開始時年齢), y=中性脂肪, fill=as.factor(性別))) + geom_boxplot(alpha=0.7, color="lightgrey") + facet_grid(~性別)+ theme_minimal()+
  scale_fill_manual(name="性別", labels=c("男性","女性"), breaks = c("1", "2"), values=c("royalblue", "palevioletred"))+ xlab("年齢") + ggtitle("HbA1c")
## Warning: Removed 18111 rows containing non-finite values (stat_boxplot).

ggplot(data=cleaned_survey, aes(x=as.factor(開始時年齢), y=HbA1c, fill=as.factor(性別))) + geom_boxplot(alpha=0.7, color="lightgrey") + facet_grid(~性別)+ theme_minimal()+
  scale_fill_manual(name="性別", labels=c("男性","女性"), breaks = c("1", "2"), values=c("royalblue", "palevioletred"))+ xlab("年齢") + ggtitle("HbA1c")
## Warning: Removed 31096 rows containing non-finite values (stat_boxplot).

ggplot(data=cleaned_survey, aes(x=as.factor(開始時年齢), y=総コレステロール, fill=as.factor(性別))) + geom_boxplot(alpha=0.7, color="lightgrey") + facet_grid(~性別)+ theme_minimal()+
  scale_fill_manual(name="性別", labels=c("男性","女性"), breaks = c("1", "2"), values=c("royalblue", "palevioletred"))+ xlab("年齢") + ggtitle("総コレステロール")
## Warning: Removed 25557 rows containing non-finite values (stat_boxplot).

ggplot(data=cleaned_survey, aes(x=as.factor(開始時年齢), y=HDLコレステロール, fill=as.factor(性別))) + geom_boxplot(alpha=0.7, color="lightgrey") + facet_grid(~性別)+ theme_minimal()+
  scale_fill_manual(name="性別", labels=c("男性","女性"), breaks = c("1", "2"), values=c("royalblue", "palevioletred"))+ xlab("年齢") + ggtitle("HDLコレステロール")
## Warning: Removed 18106 rows containing non-finite values (stat_boxplot).

ggplot(data=cleaned_survey, aes(x=as.factor(開始時年齢), y=GOT, fill=as.factor(性別))) + geom_boxplot(alpha=0.7, color="lightgrey") + facet_grid(~性別)+ theme_minimal()+
  scale_fill_manual(name="性別", labels=c("男性","女性"), breaks = c("1", "2"), values=c("royalblue", "palevioletred"))+ xlab("年齢") + ggtitle("GOT") + ylim(c(0,250))
## Warning: Removed 18118 rows containing non-finite values (stat_boxplot).

ggplot(data=cleaned_survey, aes(x=as.factor(開始時年齢), y=GPT, fill=as.factor(性別))) + geom_boxplot(alpha=0.7, color="lightgrey") + facet_grid(~性別)+ theme_minimal()+
  scale_fill_manual(name="性別", labels=c("男性","女性"), breaks = c("1", "2"), values=c("royalblue", "palevioletred"))+ xlab("年齢") + ggtitle("GPT") + ylim(c(0,500))
## Warning: Removed 18110 rows containing non-finite values (stat_boxplot).

ggplot(data=cleaned_survey, aes(x=as.factor(開始時年齢), y=γGTP, fill=as.factor(性別))) + geom_boxplot(alpha=0.7, color="lightgrey") + facet_grid(~性別)+ theme_minimal()+
  scale_fill_manual(name="性別", labels=c("男性","女性"), breaks = c("1", "2"), values=c("royalblue", "palevioletred"))+ xlab("年齢") + ggtitle("γGTP") + ylim(c(0,500))
## Warning: Removed 19115 rows containing non-finite values (stat_boxplot).

ggplot(data=cleaned_survey, aes(x=as.factor(開始時年齢), y=尿酸, fill=as.factor(性別))) + geom_boxplot(alpha=0.7, color="lightgrey") + facet_grid(~性別)+ theme_minimal()+
  scale_fill_manual(name="性別", labels=c("男性","女性"), breaks = c("1", "2"), values=c("royalblue", "palevioletred"))+ xlab("年齢") + ggtitle("尿酸")
## Warning: Removed 22549 rows containing non-finite values (stat_boxplot).

ggplot(data=cleaned_survey, aes(x=as.factor(開始時年齢), y=クレアチニン, fill=as.factor(性別))) + geom_boxplot(alpha=0.7, color="lightgrey") + facet_grid(~性別)+ theme_minimal()+
  scale_fill_manual(name="性別", labels=c("男性","女性"), breaks = c("1", "2"), values=c("royalblue", "palevioletred"))+ xlab("年齢")  + ggtitle("クレアチニン")+ ylim(c(0,10))
## Warning: Removed 20141 rows containing non-finite values (stat_boxplot).

ggplot(data=cleaned_survey, aes(x=as.factor(開始時年齢), y=収縮期血圧, fill=as.factor(性別))) + geom_boxplot(alpha=0.7, color="lightgrey") + facet_grid(~性別)+ theme_minimal()+
  scale_fill_manual(name="性別", labels=c("男性","女性"), breaks = c("1", "2"), values=c("royalblue", "palevioletred"))+ xlab("年齢") + ggtitle("収縮期血圧")
## Warning: Removed 19694 rows containing non-finite values (stat_boxplot).

ggplot(data=cleaned_survey, aes(x=as.factor(開始時年齢), y=拡張期血圧, fill=as.factor(性別))) + geom_boxplot(alpha=0.7, color="lightgrey") + facet_grid(~性別)+ theme_minimal()+
  scale_fill_manual(name="性別", labels=c("男性","女性"), breaks = c("1", "2"), values=c("royalblue", "palevioletred"))+ xlab("年齢") + ggtitle("拡張期血圧")
## Warning: Removed 19736 rows containing non-finite values (stat_boxplot).

Biomarker and diabetes plots

ggplot(data=cleaned_survey, aes(x=as.factor(開始時年齢), y=中性脂肪, color=as.factor(糖尿病))) + geom_point(size=0.5) + facet_grid(~性別)+ theme_minimal()+
  scale_color_manual(name="糖尿病", labels=c("なし","かかっている","かかったことある"), breaks = c("1", "2", "3"), values=c("lightgrey", "salmon", "salmon"))+ xlab("年齢") + ylim(c(0,2000))
## Warning: Removed 22884 rows containing missing values (geom_point).

ggplot(data=cleaned_survey, aes(x=as.factor(開始時年齢), y=HbA1c, color=as.factor(糖尿病))) + geom_point(size=2) + facet_grid(~性別)+ theme_minimal()+
  scale_color_manual(name="糖尿病", labels=c("なし","かかっている","かかったことある"), breaks = c("1", "2", "3"), values=c("lightgrey", "salmon", "salmon"))+ xlab("年齢") 
## Warning: Removed 35817 rows containing missing values (geom_point).

ggplot(data=cleaned_survey, aes(x=as.factor(開始時年齢), y=総コレステロール, color=as.factor(糖尿病))) + geom_point(size=0.5) + facet_grid(~性別)+ theme_minimal()+
  scale_color_manual(name="糖尿病", labels=c("なし","かかっている","かかったことある"), breaks = c("1", "2", "3"), values=c("lightgrey", "salmon", "salmon"))+ xlab("年齢") 
## Warning: Removed 30311 rows containing missing values (geom_point).

ggplot(data=cleaned_survey, aes(x=as.factor(開始時年齢), y=HDLコレステロール, color=as.factor(糖尿病))) + geom_point(size=0.5) + facet_grid(~性別)+ theme_minimal()+
  scale_color_manual(name="糖尿病", labels=c("なし","かかっている","かかったことある"), breaks = c("1", "2", "3"), values=c("lightgrey", "salmon", "salmon"))+ xlab("年齢") 
## Warning: Removed 22876 rows containing missing values (geom_point).

ggplot(data=cleaned_survey, aes(x=as.factor(開始時年齢), y=GOT, color=as.factor(糖尿病))) + geom_point(size=0.5) + facet_grid(~性別)+ theme_minimal()+
  scale_color_manual(name="糖尿病", labels=c("なし","かかっている","かかったことある"), breaks = c("1", "2", "3"), values=c("lightgrey", "salmon", "salmon"))+ xlab("年齢")  + ylim(c(0,250))
## Warning: Removed 22888 rows containing missing values (geom_point).

ggplot(data=cleaned_survey, aes(x=as.factor(開始時年齢), y=GPT, color=as.factor(糖尿病))) + geom_point(size=0.5) + facet_grid(~性別)+ theme_minimal()+
  scale_color_manual(name="糖尿病", labels=c("なし","かかっている","かかったことある"), breaks = c("1", "2", "3"), values=c("lightgrey", "salmon", "salmon"))+ xlab("年齢") + ylim(c(0,500))
## Warning: Removed 22880 rows containing missing values (geom_point).

ggplot(data=cleaned_survey, aes(x=as.factor(開始時年齢), y=γGTP, color=as.factor(糖尿病))) + geom_point(size=0.5) + facet_grid(~性別)+ theme_minimal()+
  scale_color_manual(name="糖尿病", labels=c("なし","かかっている","かかったことある"), breaks = c("1", "2", "3"), values=c("lightgrey", "salmon", "salmon"))+ xlab("年齢") + ylim(c(0,500))
## Warning: Removed 23865 rows containing missing values (geom_point).

ggplot(data=cleaned_survey, aes(x=as.factor(開始時年齢), y=尿酸, color=as.factor(糖尿病))) + geom_point(size=0.5) + facet_grid(~性別)+ theme_minimal()+
  scale_color_manual(name="糖尿病", labels=c("なし","かかっている","かかったことある"), breaks = c("1", "2", "3"), values=c("lightgrey", "salmon", "salmon"))+ xlab("年齢")
## Warning: Removed 27316 rows containing missing values (geom_point).

ggplot(data=cleaned_survey, aes(x=as.factor(開始時年齢), y=クレアチニン, color=as.factor(糖尿病))) + geom_point(size=0.5) + facet_grid(~性別)+ theme_minimal()+
  scale_color_manual(name="糖尿病", labels=c("なし","かかっている","かかったことある"), breaks = c("1", "2", "3"), values=c("lightgrey", "salmon", "salmon"))+ xlab("年齢") + ylim(c(0,10))
## Warning: Removed 24896 rows containing missing values (geom_point).

ggplot(data=cleaned_survey, aes(x=as.factor(開始時年齢), y=収縮期血圧, color=as.factor(糖尿病))) + geom_point(size=0.5) + facet_grid(~性別)+ theme_minimal()+
  scale_color_manual(name="糖尿病", labels=c("なし","かかっている","かかったことある"), breaks = c("1", "2", "3"), values=c("lightgrey", "salmon", "salmon"))+ xlab("年齢")+ xlab("年齢")
## Warning: Removed 24463 rows containing missing values (geom_point).

ggplot(data=cleaned_survey, aes(x=as.factor(開始時年齢), y=拡張期血圧, color=as.factor(糖尿病))) + geom_point(size=0.5) + facet_grid(~性別)+ theme_minimal()+
  scale_color_manual(name="糖尿病", labels=c("なし","かかっている","かかったことある"), breaks = c("1", "2", "3"), values=c("lightgrey", "salmon", "salmon"))+ xlab("年齢")
## Warning: Removed 24505 rows containing missing values (geom_point).

biomarker_dat = cleaned_survey %>%
  select(性別, 開始時年齢, 中性脂肪 , HbA1c , 総コレステロール , GOT , GPT , γGTP , 尿酸 , クレアチニン , 収縮期血圧 , 拡張期血圧) %>%
  na.omit()

biomarker_dat$中性脂肪[biomarker_dat$中性脂肪 > 2000] = NA
biomarker_dat$GOT[biomarker_dat$GOT > 250] = NA
biomarker_dat$GPT[biomarker_dat$GPT > 500] = NA
biomarker_dat$γGTP[biomarker_dat$γGTP > 500] = NA
biomarker_dat$クレアチニン[biomarker_dat$クレアチニン > 10] = NA




biomarker_male = biomarker_dat %>% na.omit() %>% filter(性別==1, 開始時年齢>=35)  %>% select(-c(性別,開始時年齢))

biomarker_pca_male = clara(scale(biomarker_male), 2)
autoplot(biomarker_pca_male,loadings=TRUE, loadings.label = TRUE, loadings.label.colour = "royalblue", loadings.label.size  = 5, loadings.colour = "royalblue") + theme_minimal() +
  ggtitle("PCA analysis of Biomarker") +scale_color_manual(name="", labels=c("",""), breaks = c("1", "2"), values=c("slategray1", "slategray1"))+theme(legend.text = element_text(size = 16)) + theme_minimal()

biomarker_female = biomarker_dat %>% na.omit() %>% filter(性別==2, 開始時年齢>=35 &開始時年齢<=45) %>% select(-c(性別,開始時年齢))

biomarker_pca_female = clara(scale(biomarker_female), 2)
autoplot(biomarker_pca_female,loadings=TRUE, loadings.label = TRUE, loadings.label.colour = "palevioletred", loadings.label.size  = 5, loadings.colour = "palevioletred") + theme_minimal() +
  ggtitle("PCA analysis of Biomarkers") +scale_color_manual(name="", labels=c("",""), breaks = c("1", "2"), values=c("seashell", "seashell"))+theme(legend.text = element_text(size = 16)) + theme_minimal()

list of diseases

revalue_disease = function(x){
  if(is.na(x)){x=0}
  else{
    x = as.character(x)
  x = revalue(x, c("1"=0, "2"=1, "3"=1, "4"=0, "9" =0)) 
  x = as.numeric(x)
}}
  

disease_freq = cleaned_survey %>%
  select(胃かいよう,十二指腸かいよう,慢性胃炎,大腸ポリープ,B型肝炎,C型肝炎,肝硬変,脂肪肝,結核,気管支喘息,慢性気管支炎,糖尿病,
           高脂血症,高血圧,狭心症_心筋梗塞,脳卒中,乳腺症,卵巣の病気,卵巣切除,子宮内膜異型増殖症,子宮の病気,子宮切除,がん1)

disease_freq[which(disease_freq == 1, arr.ind = T)] = 0
disease_freq[which(disease_freq == 2, arr.ind = T)] = 1
disease_freq[which(disease_freq == 3, arr.ind = T)] = 1
disease_freq[which(disease_freq == 4, arr.ind = T)] = 0
disease_freq[which(disease_freq == 9, arr.ind = T)] = 0



disease_freq = cbind(subset(cleaned_survey, select=c(性別, 開始時年齢)), disease_freq)  

disease_freq = disease_freq %>% 
  mutate(ag_group = factor(開始時年齢 > 55, labels = c("Below 55", "Above 55")), sex =factor(性別, labels = c("Male", "Female"))) 
  

my.render.cont <- function(x) {
  x = x*100
    with(stats.apply.rounding(stats.default(x), digits=2), c("% "=sprintf("%s %%", MEAN)))
}


table1(data=disease_freq, ~胃かいよう + 十二指腸かいよう + 慢性胃炎 + 大腸ポリープ + B型肝炎 + C型肝炎 + 肝硬変 + 脂肪肝 + 結核 + 気管支喘息 + 慢性気管支炎 + 糖尿病 + 
           高脂血症 + 高血圧 + 狭心症_心筋梗塞 + 脳卒中+ がん1 |ag_group*sex, render.continuous = my.render.cont, topclass = "Rtable1-zebra", overall=F)
Below 55
Above 55
Male
(n=17217)
Female
(n=25168)
Male
(n=23665)
Female
(n=26562)
胃かいよう 14 % 8.6 % 22 % 11 %
Missing 3112 (18.1%) 4963 (19.7%) 5697 (24.1%) 7740 (29.1%)
十二指腸かいよう 12 % 5.6 % 16 % 6.9 %
Missing 2705 (15.7%) 4435 (17.6%) 5464 (23.1%) 7475 (28.1%)
慢性胃炎 9.8 % 11 % 16 % 14 %
Missing 3128 (18.2%) 4976 (19.8%) 5724 (24.2%) 7765 (29.2%)
大腸ポリープ 7.0 % 3.0 % 20 % 8.8 %
Missing 2709 (15.7%) 4441 (17.6%) 5464 (23.1%) 7476 (28.1%)
B型肝炎 1.4 % 0.98 % 2.0 % 1.3 %
Missing 3106 (18.0%) 4954 (19.7%) 5700 (24.1%) 7753 (29.2%)
C型肝炎 0.77 % 0.61 % 1.9 % 1.5 %
Missing 3112 (18.1%) 4955 (19.7%) 5699 (24.1%) 7744 (29.2%)
肝硬変 0.26 % 0.11 % 0.69 % 0.25 %
Missing 926 (5.4%) 1131 (4.5%) 2136 (9.0%) 2525 (9.5%)
脂肪肝 16 % 4.2 % 13 % 7.5 %
Missing 3148 (18.3%) 4976 (19.8%) 5746 (24.3%) 7788 (29.3%)
結核 1.1 % 1.0 % 3.8 % 2.7 %
Missing 2702 (15.7%) 4441 (17.6%) 5456 (23.1%) 7471 (28.1%)
気管支喘息 6.7 % 7.5 % 4.7 % 5.8 %
Missing 497 (2.9%) 589 (2.3%) 1843 (7.8%) 2173 (8.2%)
慢性気管支炎 1.8 % 2.4 % 2.2 % 3.0 %
Missing 3117 (18.1%) 4956 (19.7%) 5705 (24.1%) 7760 (29.2%)
糖尿病 4.7 % 1.4 % 14 % 5.7 %
Missing 487 (2.8%) 599 (2.4%) 1627 (6.9%) 2141 (8.1%)
高脂血症 13 % 6.7 % 19 % 25 %
Missing 487 (2.8%) 577 (2.3%) 1695 (7.2%) 1789 (6.7%)
高血圧 14 % 6.7 % 34 % 26 %
Missing 432 (2.5%) 533 (2.1%) 1302 (5.5%) 1670 (6.3%)
狭心症_心筋梗塞 1.6 % 0.57 % 6.2 % 3.4 %
Missing 909 (5.3%) 1132 (4.5%) 2035 (8.6%) 2471 (9.3%)
脳卒中 0.90 % 0.55 % 3.6 % 2.0 %
Missing 527 (3.1%) 634 (2.5%) 1851 (7.8%) 2258 (8.5%)
がん1 3.4 % 4.6 % 10 % 7.2 %
table1(data=subset(disease_freq,性別 ==2), ~ 乳腺症 + 卵巣の病気 + 卵巣切除 + 子宮内膜異型増殖症 + 子宮の病気 + 子宮切除 |ag_group,  render.continuous = my.render.cont, topclass = "Rtable1-zebra", overall=F)
Below 55
(n=25168)
Above 55
(n=26562)
乳腺症 16 % 15 %
Missing 5027 (20.0%) 7800 (29.4%)
卵巣の病気 7.0 % 8.8 %
Missing 4995 (19.8%) 7824 (29.5%)
卵巣切除 87 % 82 %
Missing 19377 (77.0%) 17487 (65.8%)
子宮内膜異型増殖症 3.5 % 2.8 %
Missing 5115 (20.3%) 7953 (29.9%)
子宮の病気 13 % 17 %
Missing 5035 (20.0%) 7863 (29.6%)
子宮切除 81 % 55 %
Missing 19468 (77.4%) 19668 (74.0%)

Family correlation for diabetes - Male

## Male
fam_diabetes_male = cleaned_survey %>%
  filter(性別==1)%>%
  select(糖尿病,父親_糖尿病, 母親_糖尿病) %>%
  mutate(息子 = 糖尿病==2 | 糖尿病==3, 父=父親_糖尿病==3, 母=母親_糖尿病==3) %>%
  select(-c(糖尿病,父親_糖尿病, 母親_糖尿病))

phimat = round(phimat_fun(fam_diabetes_male),2) 

# Reorder the Phi correlation matrix
phimat <- reorder_phimat(phimat)
upper_tri <- get_upper_tri(phimat)
# Melt the correlation matrix
melted_phimat <- melt(upper_tri, na.rm = TRUE)
## Warning in melt(upper_tri, na.rm = TRUE): The melt generic in data.table
## has been passed a matrix and will attempt to redirect to the relevant
## reshape2 method; please note that reshape2 is deprecated, and this
## redirection is now deprecated as well. To continue using melt methods from
## reshape2 while both libraries are attached, e.g. melt.list, you can prepend
## the namespace like reshape2::melt(upper_tri). In the next version, this
## warning will become an error.
# Create a ggheatmap
ggheatmap <- ggplot(melted_phimat, aes(Var2, Var1, fill = value))+
 geom_tile(color = "white")+
 scale_fill_gradient2(low = "seagreen", high = "salmon", mid = "white", 
   midpoint = 0, limit = c(-1,1), space = "Lab", 
    name="Phi\nCorrelation") +
#  theme_minimal()+ # minimal theme
 theme(axis.text.x = element_text(angle = 0, vjust = 1, 
    size = 16, hjust = 1), axis.text.y = element_text(angle = 0, vjust = 1, 
    size = 16, hjust = 1))+
 coord_fixed()
# Print the heatmap
#print(ggheatmap)
ggheatmap + 
geom_text(aes(Var2, Var1, label = value), color = "black", size = 8) +
theme(
  axis.title.x = element_blank(),
  axis.title.y = element_blank(),
  panel.grid.major = element_blank(),
  panel.border = element_rect(colour = "black", fill=NA, size=2),
  panel.background = element_blank(),
  axis.ticks = element_blank(),
  
  legend.direction = "horizontal")+
  guides(fill = guide_colorbar(barwidth = 7, barheight = 1,
                title.position = "top", title.hjust = 0.5))+
ggtitle("糖尿病の家族相関性:息子の場合")

Family correlation for diabetes - Female

## female
fam_diabetes_female = cleaned_survey %>%
  filter(性別==2)%>%
  select(糖尿病,父親_糖尿病, 母親_糖尿病) %>%
  mutate(娘 = 糖尿病==2 | 糖尿病==3, 父=父親_糖尿病==3, 母=母親_糖尿病==3) %>%
  select(-c(糖尿病,父親_糖尿病, 母親_糖尿病))

phimat = round(phimat_fun(fam_diabetes_female),2) 

# Reorder the Phi correlation matrix
phimat <- reorder_phimat(phimat)
upper_tri <- get_upper_tri(phimat)
# Melt the correlation matrix
melted_phimat <- melt(upper_tri, na.rm = TRUE)
## Warning in melt(upper_tri, na.rm = TRUE): The melt generic in data.table
## has been passed a matrix and will attempt to redirect to the relevant
## reshape2 method; please note that reshape2 is deprecated, and this
## redirection is now deprecated as well. To continue using melt methods from
## reshape2 while both libraries are attached, e.g. melt.list, you can prepend
## the namespace like reshape2::melt(upper_tri). In the next version, this
## warning will become an error.
# Create a ggheatmap
ggheatmap <- ggplot(melted_phimat, aes(Var2, Var1, fill = value))+
 geom_tile(color = "white")+
 scale_fill_gradient2(low = "seagreen", high = "salmon", mid = "white", 
   midpoint = 0, limit = c(-1,1), space = "Lab", 
    name="Phi\nCorrelation") +
#  theme_minimal()+ # minimal theme
 theme(axis.text.x = element_text(angle = 0, vjust = 1, 
    size = 16, hjust = 1), axis.text.y = element_text(angle = 0, vjust = 1, 
    size = 16, hjust = 1))+
 coord_fixed()
# Print the heatmap
#print(ggheatmap)
ggheatmap + 
geom_text(aes(Var2, Var1, label = value), color = "black", size = 8) +
theme(
  axis.title.x = element_blank(),
  axis.title.y = element_blank(),
  panel.grid.major = element_blank(),
  panel.border = element_rect(colour = "black", fill=NA, size=2),
  panel.background = element_blank(),
  axis.ticks = element_blank(),
  
  legend.direction = "horizontal")+
  guides(fill = guide_colorbar(barwidth = 7, barheight = 1,
                title.position = "top", title.hjust = 0.5))+
ggtitle("糖尿病の家族相関性:娘の場合")