pacman::p_load(pacman, psych, dplyr, GGally, ggplot2, ggthemes,
ggvis, httr, lubridate, plotly, rio, sasLM, rmarkdown, shiny,
stringr, tidyr, MASS, datawizard, car, corrplot, moonBook, data.table)
#데이터 가져오고, 각 excel sheet 에 있는 자료를 각각 다른 변수로 저장해두자.
#2. 이제 Summary Table 먼저 만들어보자. head(STATAData)
T1 = mytable(~ age+sex+location+pathology, data = STATAData) ## 이mytable function 쓰면 summary table 매우 쉽게 만들어줌
print(T1)
##
## Descriptive Statistics
## ————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————
## Mean ± SD or % N Missing (%)
## ————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————
## age 44.7 ± 16.5 23 0 ( 0.0%)
## sex 23 0 ( 0.0%)
## - F 9 (39.1%)
## - M 14 (60.9%)
## location 23 0 ( 0.0%)
## - Acetabulum 2 (8.7%)
## - Femur 6 (26.1%)
## - Fibula 1 (4.3%)
## - Frontal sinus 1 (4.3%)
## - Humerus 1 (4.3%)
## - Ischium 1 (4.3%)
## - Mandible 1 (4.3%)
## - Maxilla 3 (13.0%)
## - Sphenoid 1 (4.3%)
## - Spine 2 (8.7%)
## - Tibia 4 (17.4%)
## pathology 23 0 ( 0.0%)
## - Bland looking spindle cell proliferation with calcification 2 (8.7%)
## - hemangiopericytoma 1 (4.3%)
## - High grade mesenchymal tumor (vimentin, S-100 positive, cytokeratin, GFAP negative), most likely mesenchymal chondrosarcoma 1 (4.3%)
## - oncogenic osteomalacia 2 (8.7%)
## - osteoblastoma 1 (4.3%)
## - PMT 12 (52.2%)
## - secondary osteosarcoma 1 (4.3%)
## - short spindle cell proliferation 2 (8.7%)
## - Unspecified 1 (4.3%)
## ————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————
T2 = mytable (sex~age+location+pathology, data= STATAData)
print(T2)
##
## Descriptive Statistics by 'sex'
## ——————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————
## F M p
## (N=9) (N=14)
## ——————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————
## age 48.7 ± 17.2 42.1 ± 16.1 0.360
## location 0.323
## - Acetabulum 0 ( 0.0%) 2 (14.3%)
## - Femur 2 (22.2%) 4 (28.6%)
## - Fibula 0 ( 0.0%) 1 ( 7.1%)
## - Frontal sinus 0 ( 0.0%) 1 ( 7.1%)
## - Humerus 1 (11.1%) 0 ( 0.0%)
## - Ischium 0 ( 0.0%) 1 ( 7.1%)
## - Mandible 0 ( 0.0%) 1 ( 7.1%)
## - Maxilla 2 (22.2%) 1 ( 7.1%)
## - Sphenoid 1 (11.1%) 0 ( 0.0%)
## - Spine 2 (22.2%) 0 ( 0.0%)
## - Tibia 1 (11.1%) 3 (21.4%)
## pathology 0.372
## - Bland looking spindle cell proliferation with calcification 0 ( 0.0%) 2 (14.3%)
## - hemangiopericytoma 0 ( 0.0%) 1 ( 7.1%)
## - High grade mesenchymal tumor (vimentin, S-100 positive, cytokeratin, GFAP negative), most likely mesenchymal chondrosarcoma 1 (11.1%) 0 ( 0.0%)
## - oncogenic osteomalacia 2 (22.2%) 0 ( 0.0%)
## - osteoblastoma 0 ( 0.0%) 1 ( 7.1%)
## - PMT 5 (55.6%) 7 (50.0%)
## - secondary osteosarcoma 0 ( 0.0%) 1 ( 7.1%)
## - short spindle cell proliferation 1 (11.1%) 1 ( 7.1%)
## - Unspecified 0 ( 0.0%) 1 ( 7.1%)
## ——————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————
#3. 며칠만에 정상화됐는지 P & ALP Boxplot 으로 그려보자. ## 구해준 데이터갖고 quantile plot 그려보자.
DT <- as.data.table(STATAData[, c("id","post_p", "post_ALP")])
df_long <- melt(DT, id.vars = "id", measure.vars = c("post_p","post_ALP"), variable.name = "Measurement", value.name = "Value") ##이렇게 형식 변환해야 boxplot 그릴 수 있음.
df_long
## id Measurement Value
## <num> <fctr> <num>
## 1: 1 post_p 52
## 2: 3 post_p 14
## 3: 4 post_p 1
## 4: 5 post_p 3
## 5: 6 post_p 0
## 6: 7 post_p 9
## 7: 8 post_p 13
## 8: 10 post_p 1
## 9: 11 post_p 13
## 10: 12 post_p 25
## 11: 13 post_p 15
## 12: 14 post_p 1
## 13: 15 post_p 458
## 14: 16 post_p 2
## 15: 17 post_p 2
## 16: 18 post_p 19
## 17: 19 post_p 3
## 18: 20 post_p 14
## 19: 21 post_p 4
## 20: 22 post_p 367
## 21: 23 post_p NA
## 22: 24 post_p 15
## 23: 25 post_p NA
## 24: 1 post_ALP 52
## 25: 3 post_ALP 227
## 26: 4 post_ALP 1
## 27: 5 post_ALP 1
## 28: 6 post_ALP 1
## 29: 7 post_ALP 271
## 30: 8 post_ALP 13
## 31: 10 post_ALP 302
## 32: 11 post_ALP 190
## 33: 12 post_ALP 700
## 34: 13 post_ALP 184
## 35: 14 post_ALP 615
## 36: 15 post_ALP 492
## 37: 16 post_ALP 0
## 38: 17 post_ALP 0
## 39: 18 post_ALP 0
## 40: 19 post_ALP 626
## 41: 20 post_ALP 1
## 42: 21 post_ALP 518
## 43: 22 post_ALP 367
## 44: 23 post_ALP NA
## 45: 24 post_ALP 413
## 46: 25 post_ALP NA
## id Measurement Value
ggplot(df_long, aes(x = Measurement, y = Value, fill = Measurement)) +
geom_boxplot() + labs(title = "Comparison of post_p and post_ALP", x =
"Measurement Type", y = "Value") + theme_minimal()
## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
t.test(STATAData$post_p, STATAData$post_ALP) ### 별 의미는 없지만Phosphate level 정상화걸리는 시간 & ALP 정상화 되는데걸리는 시간이 유의미하게 차이가 있더라.
##
## Welch Two Sample t-test
##
## data: STATAData$post_p and STATAData$post_ALP
## t = -3.1572, df = 29.473, p-value = 0.00366
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -309.30920 -66.21461
## sample estimates:
## mean of x mean of y
## 49.09524 236.85714
dateData <- import_list("/Users/Andy/Desktop/Normalization_Date.xlsx")
normalDate <-dateData$Sheet1
head(normalDate)
## ID Date Phosphate FirstOP
## 1 1 2011-07-07 1.0 2011-07-07
## 2 1 2011-07-08 1.9 2011-07-07
## 3 1 2011-07-08 1.0 2011-07-07
## 4 1 2011-07-09 1.0 2011-07-07
## 5 1 2011-07-10 2.0 2011-07-07
## 6 1 2011-07-11 2.0 2011-07-07
normalDate <- normalDate %>%
mutate( datedif= ceiling(difftime(Date,
FirstOP, units="weeks")) ) ### 몇 주 지났는지를 올림해서 주 수로표시했어
head(normalDate)
## ID Date Phosphate FirstOP datedif
## 1 1 2011-07-07 1.0 2011-07-07 0 weeks
## 2 1 2011-07-08 1.9 2011-07-07 1 weeks
## 3 1 2011-07-08 1.0 2011-07-07 1 weeks
## 4 1 2011-07-09 1.0 2011-07-07 1 weeks
## 5 1 2011-07-10 2.0 2011-07-07 1 weeks
## 6 1 2011-07-11 2.0 2011-07-07 1 weeks
graphData <- normalDate %>%
group_by(datedif) %>%
summarise( mean = mean(Phosphate, na.rm = TRUE), # Mean of Phosphate
totalNum = n(), #Total number of rows
ErrorSD = sd(Phosphate, na.rm = TRUE) # Standard deviation of Phosphate
)
### 일단 수술후 주 수 별로 phosphate 수치들어떻게 되는지 데이터 새로 추출해야돼.
graphData
## # A tibble: 62 × 4
## datedif mean totalNum ErrorSD
## <drtn> <dbl> <int> <dbl>
## 1 0 weeks 1.81 23 0.455
## 2 1 weeks 2.42 35 0.632
## 3 2 weeks 3.97 9 1.06
## 4 3 weeks 3.52 6 1.20
## 5 4 weeks 2.7 2 0.141
## 6 5 weeks 4.5 1 NA
## 7 6 weeks 3.5 1 NA
## 8 7 weeks 3.93 3 1.32
## 9 8 weeks 2.85 2 0.212
## 10 9 weeks 3.55 2 1.34
## # ℹ 52 more rows
##자 이제 그래프 그려보자. x 축 = weeks, y 축 =phosphate , error bars =필요! , 기준선도 필요!
ggplot(graphData, aes(x = datedif, y = mean)) +
geom_point(size = 3) + # Plot the points
geom_line(linetype ="dashed") + # Connect the points with dashed lines
geom_errorbar(aes(ymin = mean, ymax = mean + ErrorSD), # Add error barsfor the upper SD only
width = 0.2) + geom_hline(yintercept = 2.5,
linetype = "solid", color = "red") + # Add reference line at y = 2.5
scale_x_continuous(limits = c(0, 8), breaks = 0:8) + # Set x-axislimits to 8 weeks and breaks
scale_y_continuous(limits=c(0,6),breaks=0:6)
## Warning: Removed 53 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 53 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Call:
## glm(formula = Re_OP ~ location + size + sex, family = "binomial",
## data = STATAData)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -18.7494 7234.0748 -0.003 0.998
## locationFemur -0.4031 2.0717 -0.195 0.846
## locationFibula -20.8484 17730.3700 -0.001 0.999
## locationFrontal sinus 20.2838 17730.3700 0.001 0.999
## locationHumerus -1.7696 19149.3565 0.000 1.000
## locationIschium 24.9888 17730.3709 0.001 0.999
## locationMandible -19.4368 17730.3700 -0.001 0.999
## locationMaxilla -18.9476 8672.2373 -0.002 0.998
## locationSphenoid -1.7226 19149.3565 0.000 1.000
## locationSpine 39.9271 19149.3566 0.002 0.998
## locationTibia -0.8930 1.9649 -0.454 0.649
## size -0.4705 0.6175 -0.762 0.446
## sexM 19.5022 7234.0747 0.003 0.998
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 25.782 on 21 degrees of freedom
## Residual deviance: 10.474 on 9 degrees of freedom
## (1 observation deleted due to missingness)
## AIC: 36.474
##
## Number of Fisher Scoring iterations: 19