#Việc 1. Phân tích trước-sau cho 1 nhóm
#1.1 Đọc dữ liệu Pre-Post study one group.csv
df <- read.csv("E:\\HOPT\\PTDLR\\DATA\\Pre-Post study one group.csv")
head(df)
## EmpID Before After
## 1 26 43 66
## 2 27 58 74
## 3 28 52 62
## 4 29 47 84
## 5 30 43 78
## 6 31 50 73
#1.2 Đánh giá hiệu quả của can thiệp bằng kiểm định t bắt cặp (paired t-test). Diễn giải kết quả
library(lessR)
##
## lessR 4.4.2 feedback: gerbing@pdx.edu
## --------------------------------------------------------------
## > d <- Read("") Read data file, many formats available, e.g., Excel
## d is default data frame, data= in analysis routines optional
##
## Many examples of reading, writing, and manipulating data,
## graphics, testing means and proportions, regression, factor analysis,
## customization, forecasting, and aggregation from pivot tables
## Enter: browseVignettes("lessR")
##
## View lessR updates, now including time series forecasting
## Enter: news(package="lessR")
##
## Interactive data analysis
## Enter: interact()
##
## Attaching package: 'lessR'
## The following object is masked from 'package:base':
##
## sort_by
ttest(Before, After, paired = TRUE, data = df)
##
##
## ------ Describe ------
##
## Difference: n.miss = 0, n = 25, mean = 19.640, sd = 11.597
##
##
## ------ Normality Assumption ------
##
## Null hypothesis is a normal distribution of Difference.
## Shapiro-Wilk normality test: W = 0.9687, p-value = 0.613
##
##
## ------ Infer ------
##
## t-cutoff for 95% range of variation: tcut = 2.064
## Standard Error of Mean: SE = 2.319
##
## Hypothesized Value H0: mu = 0
## Hypothesis Test of Mean: t-value = 8.468, df = 24, p-value = 0.000
##
## Margin of Error for 95% Confidence Level: 4.787
## 95% Confidence Interval for Mean: 14.853 to 24.427
##
##
## ------ Effect Size ------
##
## Distance of sample mean from hypothesized: 19.640
## Standardized Distance, Cohen's d: 1.694
##
##
## ------ Graphics Smoothing Parameter ------
##
## Density bandwidth for 6.941


ttest(After, Before, paired = TRUE, data = df)
##
##
## ------ Describe ------
##
## Difference: n.miss = 0, n = 25, mean = -19.640, sd = 11.597
##
##
## ------ Normality Assumption ------
##
## Null hypothesis is a normal distribution of Difference.
## Shapiro-Wilk normality test: W = 0.9687, p-value = 0.613
##
##
## ------ Infer ------
##
## t-cutoff for 95% range of variation: tcut = 2.064
## Standard Error of Mean: SE = 2.319
##
## Hypothesized Value H0: mu = 0
## Hypothesis Test of Mean: t-value = -8.468, df = 24, p-value = 0.000
##
## Margin of Error for 95% Confidence Level: 4.787
## 95% Confidence Interval for Mean: -24.427 to -14.853
##
##
## ------ Effect Size ------
##
## Distance of sample mean from hypothesized: -19.640
## Standardized Distance, Cohen's d: 1.694
##
##
## ------ Graphics Smoothing Parameter ------
##
## Density bandwidth for 6.941


#Việc 2. Phân tích trước-sau cho 2 nhóm
#2.1 Đọc dữ liệu Pre-Post test dataset.csv
df1 <- read.csv("E:\\HOPT\\PTDLR\\DATA\\Pre-Post test dataset.csv")
head(df1)
## EmpID Treatment Before After
## 1 26 New 41 66
## 2 27 New 56 74
## 3 28 New 50 62
## 4 29 New 45 84
## 5 30 New 41 78
## 6 31 New 48 73
#2.2 So sánh hiệu quả của 2 can thiệp bằng kiểm định t bắt cặp. Diễn giải kết quả. Bạn nhận xét gì về kết quả này.
df1$Diff = df1$After - df1$Before
ttest(Diff ~ Treatment, data = df1)
##
## Compare Diff across Treatment with levels New and Old
## Grouping Variable: Treatment
## Response Variable: Diff
##
##
## ------ Describe ------
##
## Diff for Treatment New: n.miss = 0, n = 25, mean = 22.040, sd = 11.238
## Diff for Treatment Old: n.miss = 0, n = 25, mean = 13.280, sd = 12.785
##
## Mean Difference of Diff: 8.760
##
## Weighted Average Standard Deviation: 12.036
##
##
## ------ Assumptions ------
##
## Note: These hypothesis tests can perform poorly, and the
## t-test is typically robust to violations of assumptions.
## Use as heuristic guides instead of interpreting literally.
##
## Null hypothesis, for each group, is a normal distribution of Diff.
## Group New Shapiro-Wilk normality test: W = 0.964, p-value = 0.502
## Group Old Shapiro-Wilk normality test: W = 0.954, p-value = 0.309
##
## Null hypothesis is equal variances of Diff, homogeneous.
## Variance Ratio test: F = 163.460/126.290 = 1.294, df = 24;24, p-value = 0.532
## Levene's test, Brown-Forsythe: t = -0.983, df = 48, p-value = 0.331
##
##
## ------ Infer ------
##
## --- Assume equal population variances of Diff for each Treatment
##
## t-cutoff for 95% range of variation: tcut = 2.011
## Standard Error of Mean Difference: SE = 3.404
##
## Hypothesis Test of 0 Mean Diff: t-value = 2.573, df = 48, p-value = 0.013
##
## Margin of Error for 95% Confidence Level: 6.845
## 95% Confidence Interval for Mean Difference: 1.915 to 15.605
##
##
## --- Do not assume equal population variances of Diff for each Treatment
##
## t-cutoff: tcut = 2.011
## Standard Error of Mean Difference: SE = 3.404
##
## Hypothesis Test of 0 Mean Diff: t = 2.573, df = 47.223, p-value = 0.013
##
## Margin of Error for 95% Confidence Level: 6.848
## 95% Confidence Interval for Mean Difference: 1.912 to 15.608
##
##
## ------ Effect Size ------
##
## --- Assume equal population variances of Diff for each Treatment
##
## Standardized Mean Difference of Diff, Cohen's d: 0.728
##
##
## ------ Practical Importance ------
##
## Minimum Mean Difference of practical importance: mmd
## Minimum Standardized Mean Difference of practical importance: msmd
## Neither value specified, so no analysis
##
##
## ------ Graphics Smoothing Parameter ------
##
## Density bandwidth for Treatment New: 6.726
## Density bandwidth for Treatment Old: 7.655

#Nhóm “New”:
ttest(After, Before, paired=TRUE, data = subset(df1, Treatment == "New"))
##
##
## ------ Describe ------
##
## Difference: n.miss = 0, n = 25, mean = -22.040, sd = 11.238
##
##
## ------ Normality Assumption ------
##
## Null hypothesis is a normal distribution of Difference.
## Shapiro-Wilk normality test: W = 0.9641, p-value = 0.502
##
##
## ------ Infer ------
##
## t-cutoff for 95% range of variation: tcut = 2.064
## Standard Error of Mean: SE = 2.248
##
## Hypothesized Value H0: mu = 0
## Hypothesis Test of Mean: t-value = -9.806, df = 24, p-value = 0.000
##
## Margin of Error for 95% Confidence Level: 4.639
## 95% Confidence Interval for Mean: -26.679 to -17.401
##
##
## ------ Effect Size ------
##
## Distance of sample mean from hypothesized: -22.040
## Standardized Distance, Cohen's d: 1.961
##
##
## ------ Graphics Smoothing Parameter ------
##
## Density bandwidth for 6.726


#Nhóm “Old”:
ttest(After, Before, paired=TRUE, data = subset(df1, Treatment == "Old"))
##
##
## ------ Describe ------
##
## Difference: n.miss = 0, n = 25, mean = -13.280, sd = 12.785
##
##
## ------ Normality Assumption ------
##
## Null hypothesis is a normal distribution of Difference.
## Shapiro-Wilk normality test: W = 0.9541, p-value = 0.309
##
##
## ------ Infer ------
##
## t-cutoff for 95% range of variation: tcut = 2.064
## Standard Error of Mean: SE = 2.557
##
## Hypothesized Value H0: mu = 0
## Hypothesis Test of Mean: t-value = -5.194, df = 24, p-value = 0.000
##
## Margin of Error for 95% Confidence Level: 5.277
## 95% Confidence Interval for Mean: -18.557 to -8.003
##
##
## ------ Effect Size ------
##
## Distance of sample mean from hypothesized: -13.280
## Standardized Distance, Cohen's d: 1.039
##
##
## ------ Graphics Smoothing Parameter ------
##
## Density bandwidth for 7.655


#Diễn giải kết quả:
#Nhóm New: Hiệu số trung bình (After - Before): +22.04; t-statistic: 9.81; p-value: < 0.000000001 (rất nhỏ)
#Diễn giải: Can thiệp New giúp cải thiện đáng kể giá trị sau can thiệp, với mức tăng trung bình 22.04 đơn vị. Kết quả này có ý nghĩa thống kê rất cao (p < 0.001), cho thấy phương pháp New có hiệu quả rõ rệt.
#Nhóm Old: Hiệu số trung bình (After - Before): +13.28; t-statistic: 5.19; p-value: 0.000025
#Diễn giải: Can thiệp Old cũng cho thấy sự cải thiện có ý nghĩa thống kê (p < 0.001), với mức tăng trung bình 13.28 đơn vị. Tuy nhiên, mức cải thiện thấp hơn đáng kể so với nhóm New.
#Kết luận: Cả hai phương pháp New và Old đều có hiệu quả cải thiện chỉ số sau can thiệp, nhưng phương pháp New tỏ ra vượt trội hơn về mức độ cải thiện trung bình (22.04 so với 13.28) và độ mạnh của bằng chứng thống kê.
#2.3 So sánh hiệu quả của 2 can thiệp bằng phân tích hiệp biến (ANCOVA). Diễn giải kết quả.
reg(After ~ Before + Treatment + Before*Treatment, data = df1)
##
## >>> Treatment is not numeric. Converted to indicator variables.



## >>> Suggestion
## # Create an R markdown file for interpretative output with Rmd = "file_name"
## reg(After ~ Before + Treatment + Before * Treatment, data=df1, Rmd="eg")
##
##
## BACKGROUND
##
## Data Frame: df1
##
## Response Variable: After
## Predictor Variable 1: Before
## Predictor Variable 2: TreatmentOld
## Predictor Variable 3: Before.TreatmentOld
##
## Number of cases (rows) of data: 50
## Number of cases retained for analysis: 50
##
##
## BASIC ANALYSIS
##
## Estimate Std Err t-value p-value Lower 95% Upper 95%
## (Intercept) 90.468 12.149 7.446 0.000 66.012 114.923
## Before -0.360 0.239 -1.503 0.140 -0.842 0.122
## TreatmentOld -9.181 16.948 -0.542 0.591 -43.295 24.933
## Before.TreatmentOld -0.056 0.342 -0.163 0.871 -0.744 0.632
##
## Standard deviation of After: 9.7945
##
## Standard deviation of residuals: 7.8801 for df=46
## 95% range of residuals: 31.7235 = 2 * (2.013 * 7.8801)
##
## R-squared: 0.392 Adjusted R-squared: 0.353 PRESS R-squared: NA
##
## Null hypothesis of all 0 population slope coefficients:
## F-statistic: 9.901 df: 3 and 46 p-value: 0.000
##
## -- Analysis of Variance from Type II Sums of Squares
##
## df Sum Sq Mean Sq F-value p-value
## Before 1 319.163 319.163 5.249 0.026
## TreatmentOld 1 1724.303 1724.303 27.769 0.000
## Before.TreatmentOld 1 1.655 1.655 0.027 0.871
## Residuals 46 2856.382 62.095
##
## -- Test of Interaction
##
## Before:Treatment df: 1 df resid: 46 SS: 1.655 F: 0.027 p-value: 0.871
##
## -- Assume parallel lines, no interaction of Treatment with Before
##
## Level New: y^_After = 90.468 + -0.360(x_Before)
## Level Old: y^_After = 81.287 + -0.360(x_Before)
##
## -- Visualize Separately Computed Regression Lines
##
## Plot(Before, After, by=Treatment, fit="lm")
##
##
## K-FOLD CROSS-VALIDATION
##
##
## RELATIONS AMONG THE VARIABLES
##
## After Before TreatmentOld Before.TreatmentOld
## After 1.00 -0.16 -0.57 -0.60
## Before -0.16 1.00 -0.17 -0.04
## TreatmentOld -0.57 -0.17 1.00 0.98
## Before.TreatmentOld -0.60 -0.04 0.98 1.00
##
## Tolerance VIF
## Before 0.494 2.024
## TreatmentOld 0.017 57.821
## Before.TreatmentOld 0.018 56.198
##
## Before TreatmentOld Before.TreatmentOld R2adj X's
## 1 1 0 0.366 2
## 1 0 1 0.362 2
## 1 1 1 0.353 3
## 0 0 1 0.342 1
## 0 1 1 0.335 2
## 0 1 0 0.310 1
## 1 0 0 0.005 1
##
## [based on Thomas Lumley's leaps function from the leaps package]
##
##
## RESIDUALS AND INFLUENCE
##
## -- Data, Fitted, Residual, Studentized Residual, Dffits, Cook's Distance
## [sorted by Cook's Distance]
## [n_res_rows = 20, out of 50 rows of data, or do n_res_rows="all"]
## --------------------------------------------------------------------------------------
## Before TreatmentOld Before.TreatmentOld After fitted resid rstdnt dffits cooks
## 40 43 1 43 42 63.415 -21.415 -3.053 -0.800 0.136
## 19 64 0 0 77 67.437 9.563 1.381 0.718 0.126
## 50 60 1 60 46 56.349 -10.349 -1.466 -0.679 0.113
## 34 51 1 51 79 60.090 18.910 2.611 0.589 0.077
## 7 64 0 0 60 67.437 -7.437 -1.065 -0.554 0.076
## 32 44 1 44 79 62.999 16.001 2.172 0.527 0.064
## 1 41 0 0 66 75.714 -9.714 -1.325 -0.490 0.059
## 8 46 0 0 61 73.915 -12.915 -1.724 -0.425 0.043
## 26 51 1 51 74 60.090 13.910 1.857 0.419 0.042
## 46 47 1 47 49 61.752 -12.752 -1.685 -0.349 0.029
## 4 45 0 0 84 74.274 9.726 1.286 0.342 0.029
## 18 43 0 0 83 74.994 8.006 1.066 0.334 0.028
## 17 53 0 0 61 71.396 -10.396 -1.364 -0.302 0.022
## 16 43 0 0 82 74.994 7.006 0.930 0.292 0.021
## 10 47 0 0 83 73.555 9.445 1.237 0.284 0.020
## 3 50 0 0 62 72.475 -10.475 -1.370 -0.280 0.019
## 45 51 1 51 51 60.090 -9.090 -1.188 -0.268 0.018
## 30 42 1 42 70 63.830 6.170 0.811 0.231 0.013
## 36 50 1 50 53 60.505 -7.505 -0.973 -0.208 0.011
## 39 50 1 50 53 60.505 -7.505 -0.973 -0.208 0.011
##
##
## PREDICTION ERROR
##
## -- Data, Predicted, Standard Error of Prediction, 95% Prediction Intervals
## [sorted by lower bound of prediction interval]
## [to see all intervals add n_pred_rows="all"]
## ----------------------------------------------
##
## Before TreatmentOld Before.TreatmentOld After pred s_pred pi.lwr pi.upr width
## 50 60 1 60 46 56.349 8.549 39.142 73.557 34.415
## 35 59 1 59 59 56.765 8.469 39.718 73.811 34.093
## 47 58 1 58 60 57.180 8.395 40.282 74.078 33.796
## ...
## 48 49 1 49 62 60.921 8.040 44.738 77.104 32.365
## 41 48 1 48 60 61.337 8.036 45.161 77.512 32.352
## 27 47 1 47 65 61.752 8.040 45.568 77.936 32.368
## ...
## 44 36 1 36 67 66.324 8.555 49.103 83.545 34.442
## 7 64 0 0 60 67.437 8.678 49.970 84.905 34.935
## 19 64 0 0 77 67.437 8.678 49.970 84.905 34.935
##
## -------------------------------------------
## Plot 1: Distribution of Residuals
## Plot 2: Residuals vs Fitted Values
## Plot 3: Scatterplot and Least-Squares Lines
## -------------------------------------------