library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.2
library(table1)
## Warning: package 'table1' was built under R version 4.5.2
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.4 ✔ tibble 3.3.0
## ✔ purrr 1.0.4 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lessR)
## Warning: package 'lessR' was built under R version 4.5.2
##
## lessR 4.5 feedback: gerbing@pdx.edu
## --------------------------------------------------------------
## > d <- Read("") Read data file, many formats available, e.g., Excel
## d is the default data frame, data= in analysis routines optional
##
## Many examples of reading, writing, and manipulating data, graphics,
## testing means and proportions, regression, factor analysis,
## customization, forecasting, and aggregation to pivot tables.
## Enter: browseVignettes("lessR")
##
## View lessR updates, now including modern time series forecasting
## and many, new Plotly interactive visualizations output. Most
## visualization functions are now reorganized to three functions:
## Chart(): type="bar", "pie", "radar", "bubble", "treemap", "icicle"
## X(): type="histogram", "density", "vbs" and more
## XY(): type="scatter" for a scatterplot, or "contour", "smooth"
## Most previous function calls still work, such as:
## BarChart(), Histogram, and Plot().
## Enter: news(package="lessR"), or ?Chart, ?X, or ?XY
## There is also Flows() for Sankey flow diagrams, see ?Flows
##
## Interactive data analysis for constructing visualizations.
## Enter: interact()
##
##
## Attaching package: 'lessR'
##
## The following objects are masked from 'package:dplyr':
##
## order_by, recode, rename
##
## The following object is masked from 'package:table1':
##
## label
th2 <- read.csv("Bone data.csv",header = T)
th2 <- th2 %>%
mutate_if(is.character,factor) %>%
mutate(id=as.factor(id),
prior.fx =as.factor(prior.fx ),
smoking=as.factor(smoking),
fx = as.factor(fx))
ggplot(th2, aes(x = fnbmd)) +
geom_histogram(
bins = 30,
color = "white",
fill = "blue"
) +
labs(
title = "Distribution of Femoral Neck Bone Mineral Density (fnbmd)",
x = "Femoral Neck BMD (g/cm²)",
y = "Frequency"
) +
theme_minimal()
## Warning: Removed 40 rows containing non-finite outside the scale range
## (`stat_bin()`).
ttest(
fnbmd ~ sex,
data = th2
)
##
## Compare fnbmd across sex with levels Male and Female
## Grouping Variable: sex
## Response Variable: fnbmd
##
##
## ------ Describe ------
##
## fnbmd for sex Male: n.miss = 23, n = 822, mean = 0.910, sd = 0.153
## fnbmd for sex Female: n.miss = 17, n = 1300, mean = 0.778, sd = 0.132
##
## Mean Difference of fnbmd: 0.132
##
## Weighted Average Standard Deviation: 0.141
##
##
## ------ Assumptions ------
##
## Note: These hypothesis tests can perform poorly, and the
## t-test is typically robust to violations of assumptions.
## Use as heuristic guides instead of interpreting literally.
##
## Null hypothesis, for each group, is a normal distribution of fnbmd.
## Group Male: Sample mean assumed normal because n > 30, so no test needed.
## Group Female: Sample mean assumed normal because n > 30, so no test needed.
##
## Null hypothesis is equal variances of fnbmd, homogeneous.
## Variance Ratio test: F = 0.023/0.018 = 1.336, df = 821;1299, p-value = 0.000
## Levene's test, Brown-Forsythe: t = 3.449, df = 2120, p-value = 0.001
##
##
## ------ Infer ------
##
## --- Assume equal population variances of fnbmd for each sex
##
## t-cutoff for 95% range of variation: tcut = 1.961
## Standard Error of Mean Difference: SE = 0.006
##
## Hypothesis Test of 0 Mean Diff: t-value = 21.080, df = 2120, p-value = 0.000
##
## Margin of Error for 95% Confidence Level: 0.012
## 95% Confidence Interval for Mean Difference: 0.120 to 0.144
##
##
## --- Do not assume equal population variances of fnbmd for each sex
##
## t-cutoff: tcut = 1.961
## Standard Error of Mean Difference: SE = 0.006
##
## Hypothesis Test of 0 Mean Diff: t = 20.407, df = 1560.981, p-value = 0.000
##
## Margin of Error for 95% Confidence Level: 0.013
## 95% Confidence Interval for Mean Difference: 0.119 to 0.145
##
##
## ------ Effect Size ------
##
## --- Assume equal population variances of fnbmd for each sex
##
## Standardized Mean Difference of fnbmd, Cohen's d: 0.939
##
##
## ------ Practical Importance ------
##
## Minimum Mean Difference of practical importance: mmd
## Minimum Standardized Mean Difference of practical importance: msmd
## Neither value specified, so no analysis
##
##
## ------ Graphics Smoothing Parameter ------
##
## Density bandwidth for sex Male: 0.044
## Density bandwidth for sex Female: 0.034
# Nhập data
placebo=c(105, 119, 100, 97, 96, 101, 94, 95, 98)
coffee=c(96, 99, 94, 89, 96, 93, 88, 105, 88)
th3 <- data.frame(placebo,coffee)
ttest(placebo,coffee)
##
## Compare Y across X with levels Group1 and Group2
## Grouping Variable: X
## Response Variable: Y
##
##
## ------ Describe ------
##
## Y for X Group1: n.miss = 0, n = 9, mean = 100.556, sd = 7.699
## Y for X Group2: n.miss = 0, n = 9, mean = 94.222, sd = 5.608
##
## Mean Difference of Y: 6.333
##
## Weighted Average Standard Deviation: 6.735
##
##
## ------ Assumptions ------
##
## Note: These hypothesis tests can perform poorly, and the
## t-test is typically robust to violations of assumptions.
## Use as heuristic guides instead of interpreting literally.
##
## Null hypothesis, for each group, is a normal distribution of Y.
## Group Group1 Shapiro-Wilk normality test: W = 0.780, p-value = 0.012
## Group Group2 Shapiro-Wilk normality test: W = 0.924, p-value = 0.427
##
## Null hypothesis is equal variances of Y, homogeneous.
## Variance Ratio test: F = 59.278/31.444 = 1.885, df = 8;8, p-value = 0.389
## Levene's test, Brown-Forsythe: t = 0.230, df = 16, p-value = 0.821
##
##
## ------ Infer ------
##
## --- Assume equal population variances of Y for each X
##
## t-cutoff for 95% range of variation: tcut = 2.120
## Standard Error of Mean Difference: SE = 3.175
##
## Hypothesis Test of 0 Mean Diff: t-value = 1.995, df = 16, p-value = 0.063
##
## Margin of Error for 95% Confidence Level: 6.731
## 95% Confidence Interval for Mean Difference: -0.397 to 13.064
##
##
## --- Do not assume equal population variances of Y for each X
##
## t-cutoff: tcut = 2.136
## Standard Error of Mean Difference: SE = 3.175
##
## Hypothesis Test of 0 Mean Diff: t = 1.995, df = 14.624, p-value = 0.065
##
## Margin of Error for 95% Confidence Level: 6.782
## 95% Confidence Interval for Mean Difference: -0.449 to 13.116
##
##
## ------ Effect Size ------
##
## --- Assume equal population variances of Y for each X
##
## Standardized Mean Difference of Y, Cohen's d: 0.940
##
##
## ------ Practical Importance ------
##
## Minimum Mean Difference of practical importance: mmd
## Minimum Standardized Mean Difference of practical importance: msmd
## Neither value specified, so no analysis
##
##
## ------ Graphics Smoothing Parameter ------
##
## Density bandwidth for X Group1: 5.642
## Density bandwidth for X Group2: 4.112
library(boot)
## Warning: package 'boot' was built under R version 4.5.2
library(simpleboot)
## Warning: package 'simpleboot' was built under R version 4.5.2
## Simple Bootstrap Routines (1.1-8)
set.seed(123)
boot_result <- two.boot(placebo,coffee, FUN = mean, R=10000)
boot.ci(boot_result, type =c("perc","bca","norm","basic"))
## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 10000 bootstrap replicates
##
## CALL :
## boot.ci(boot.out = boot_result, type = c("perc", "bca", "norm",
## "basic"))
##
## Intervals :
## Level Normal Basic
## 95% ( 0.383, 12.188 ) ( 0.111, 11.778 )
##
## Level Percentile BCa
## 95% ( 0.889, 12.556 ) ( 0.778, 12.444 )
## Calculations and Intervals on Original Scale
# perc: percentile
# bca: dùng khi cỡ mẫu nhỏ
library(table1)
table1(~fx|sex, data = th2)
| Female (N=1317) |
Male (N=845) |
Overall (N=2162) |
|
|---|---|---|---|
| fx | |||
| 0 | 916 (69.6%) | 701 (83.0%) | 1617 (74.8%) |
| 1 | 401 (30.4%) | 144 (17.0%) | 545 (25.2%) |