df = read.csv("C:\\Thach\\VN trips\\2024_4Dec\\SIS Can Tho\\Datasets\\Pre-Post study one group.csv")
head(df)
## EmpID Before After
## 1 26 43 66
## 2 27 58 74
## 3 28 52 62
## 4 29 47 84
## 5 30 43 78
## 6 31 50 73
library(lessR)
## Warning: package 'lessR' was built under R version 4.3.3
##
## lessR 4.3.9 feedback: gerbing@pdx.edu
## --------------------------------------------------------------
## > d <- Read("") Read text, Excel, SPSS, SAS, or R data file
## d is default data frame, data= in analysis routines optional
##
## Many examples of reading, writing, and manipulating data,
## graphics, testing means and proportions, regression, factor analysis,
## customization, and descriptive statistics from pivot tables
## Enter: browseVignettes("lessR")
##
## View lessR updates, now including time series forecasting
## Enter: news(package="lessR")
##
## Interactive data analysis
## Enter: interact()
ttest(Before, After, paired = TRUE, data = df)
##
##
## ------ Describe ------
##
## Difference: n.miss = 0, n = 25, mean = 19.640, sd = 11.597
##
##
## ------ Normality Assumption ------
##
## Null hypothesis is a normal distribution of Difference.
## Shapiro-Wilk normality test: W = 0.9687, p-value = 0.613
##
##
## ------ Infer ------
##
## t-cutoff for 95% range of variation: tcut = 2.064
## Standard Error of Mean: SE = 2.319
##
## Hypothesized Value H0: mu = 0
## Hypothesis Test of Mean: t-value = 8.468, df = 24, p-value = 0.000
##
## Margin of Error for 95% Confidence Level: 4.787
## 95% Confidence Interval for Mean: 14.853 to 24.427
##
##
## ------ Effect Size ------
##
## Distance of sample mean from hypothesized: 19.640
## Standardized Distance, Cohen's d: 1.694
##
##
## ------ Graphics Smoothing Parameter ------
##
## Density bandwidth for 6.941
df2 = read.csv("C:\\Thach\\VN trips\\2024_4Dec\\SIS Can Tho\\Datasets\\Pre-Post test dataset.csv")
head(df2)
## EmpID Treatment Before After
## 1 26 New 41 66
## 2 27 New 56 74
## 3 28 New 50 62
## 4 29 New 45 84
## 5 30 New 41 78
## 6 31 New 48 73
df2$Diff = df2$After - df2$Before
ttest(Diff ~ Treatment, data = df2)
##
## Compare Diff across Treatment with levels New and Old
## Grouping Variable: Treatment
## Response Variable: Diff
##
##
## ------ Describe ------
##
## Diff for Treatment New: n.miss = 0, n = 25, mean = 22.040, sd = 11.238
## Diff for Treatment Old: n.miss = 0, n = 25, mean = 13.280, sd = 12.785
##
## Mean Difference of Diff: 8.760
##
## Weighted Average Standard Deviation: 12.036
##
##
## ------ Assumptions ------
##
## Note: These hypothesis tests can perform poorly, and the
## t-test is typically robust to violations of assumptions.
## Use as heuristic guides instead of interpreting literally.
##
## Null hypothesis, for each group, is a normal distribution of Diff.
## Group New Shapiro-Wilk normality test: W = 0.964, p-value = 0.502
## Group Old Shapiro-Wilk normality test: W = 0.954, p-value = 0.309
##
## Null hypothesis is equal variances of Diff, homogeneous.
## Variance Ratio test: F = 163.460/126.290 = 1.294, df = 24;24, p-value = 0.532
## Levene's test, Brown-Forsythe: t = -0.983, df = 48, p-value = 0.331
##
##
## ------ Infer ------
##
## --- Assume equal population variances of Diff for each Treatment
##
## t-cutoff for 95% range of variation: tcut = 2.011
## Standard Error of Mean Difference: SE = 3.404
##
## Hypothesis Test of 0 Mean Diff: t-value = 2.573, df = 48, p-value = 0.013
##
## Margin of Error for 95% Confidence Level: 6.845
## 95% Confidence Interval for Mean Difference: 1.915 to 15.605
##
##
## --- Do not assume equal population variances of Diff for each Treatment
##
## t-cutoff: tcut = 2.011
## Standard Error of Mean Difference: SE = 3.404
##
## Hypothesis Test of 0 Mean Diff: t = 2.573, df = 47.223, p-value = 0.013
##
## Margin of Error for 95% Confidence Level: 6.848
## 95% Confidence Interval for Mean Difference: 1.912 to 15.608
##
##
## ------ Effect Size ------
##
## --- Assume equal population variances of Diff for each Treatment
##
## Standardized Mean Difference of Diff, Cohen's d: 0.728
##
##
## ------ Practical Importance ------
##
## Minimum Mean Difference of practical importance: mmd
## Minimum Standardized Mean Difference of practical importance: msmd
## Neither value specified, so no analysis
##
##
## ------ Graphics Smoothing Parameter ------
##
## Density bandwidth for Treatment New: 6.726
## Density bandwidth for Treatment Old: 7.655
reg(After ~ Before + Treatment + Before*Treatment, data = df2)
##
## >>> Treatment is not numeric. Converted to indicator variables.
## >>> Suggestion
## # Create an R markdown file for interpretative output with Rmd = "file_name"
## reg(After ~ Before + Treatment + Before * Treatment, data=df2, Rmd="eg")
##
##
## BACKGROUND
##
## Data Frame: df2
##
## Response Variable: After
## Predictor Variable 1: Before
## Predictor Variable 2: TreatmentOld
## Predictor Variable 3: Before.TreatmentOld
##
## Number of cases (rows) of data: 50
## Number of cases retained for analysis: 50
##
##
## BASIC ANALYSIS
##
## Estimate Std Err t-value p-value Lower 95% Upper 95%
## (Intercept) 90.468 12.149 7.446 0.000 66.012 114.923
## Before -0.360 0.239 -1.503 0.140 -0.842 0.122
## TreatmentOld -9.181 16.948 -0.542 0.591 -43.295 24.933
## Before.TreatmentOld -0.056 0.342 -0.163 0.871 -0.744 0.632
##
## Standard deviation of After: 9.795
##
## Standard deviation of residuals: 7.880 for df=46
## 95% range of residuals: 31.723 = 2 * (2.013 * 7.880)
##
## R-squared: 0.392 Adjusted R-squared: 0.353 PRESS R-squared: NA
##
## Null hypothesis of all 0 population slope coefficients:
## F-statistic: 9.901 df: 3 and 46 p-value: 0.000
##
## -- Analysis of Variance from Type II Sums of Squares
##
## df Sum Sq Mean Sq F-value p-value
## Before 1 319.163 319.163 5.249 0.026
## TreatmentOld 1 1724.303 1724.303 27.769 0.000
## Before.TreatmentOld 1 1.655 1.655 0.027 0.871
## Residuals 46 2856.382 62.095
##
## -- Test of Interaction
##
## Before:Treatment df: 1 df resid: 46 SS: 1.655 F: 0.027 p-value: 0.871
##
## -- Assume parallel lines, no interaction of Treatment with Before
##
## Level New: y^_After = 90.468 + -0.360(x_Before)
## Level Old: y^_After = 81.287 + -0.360(x_Before)
##
## -- Visualize Separately Computed Regression Lines
##
## Plot(Before, After, by=Treatment, fit="lm")
##
##
## K-FOLD CROSS-VALIDATION
##
##
## RELATIONS AMONG THE VARIABLES
##
## After Before TreatmentOld Before.TreatmentOld
## After 1.00 -0.16 -0.57 -0.60
## Before -0.16 1.00 -0.17 -0.04
## TreatmentOld -0.57 -0.17 1.00 0.98
## Before.TreatmentOld -0.60 -0.04 0.98 1.00
##
## Tolerance VIF
## Before 0.494 2.024
## TreatmentOld 0.017 57.821
## Before.TreatmentOld 0.018 56.198
##
## Before TreatmentOld Before.TreatmentOld R2adj X's
## 1 1 0 0.366 2
## 1 0 1 0.362 2
## 1 1 1 0.353 3
## 0 0 1 0.342 1
## 0 1 1 0.335 2
## 0 1 0 0.310 1
## 1 0 0 0.005 1
##
## [based on Thomas Lumley's leaps function from the leaps package]
##
##
## RESIDUALS AND INFLUENCE
##
## -- Data, Fitted, Residual, Studentized Residual, Dffits, Cook's Distance
## [sorted by Cook's Distance]
## [n_res_rows = 20, out of 50 rows of data, or do n_res_rows="all"]
## --------------------------------------------------------------------------------------
## Before TreatmentOld Before.TreatmentOld After fitted resid rstdnt dffits cooks
## 40 43 1 43 42 63.415 -21.415 -3.053 -0.800 0.136
## 19 64 0 0 77 67.437 9.563 1.381 0.718 0.126
## 50 60 1 60 46 56.349 -10.349 -1.466 -0.679 0.113
## 34 51 1 51 79 60.090 18.910 2.611 0.589 0.077
## 7 64 0 0 60 67.437 -7.437 -1.065 -0.554 0.076
## 32 44 1 44 79 62.999 16.001 2.172 0.527 0.064
## 1 41 0 0 66 75.714 -9.714 -1.325 -0.490 0.059
## 8 46 0 0 61 73.915 -12.915 -1.724 -0.425 0.043
## 26 51 1 51 74 60.090 13.910 1.857 0.419 0.042
## 46 47 1 47 49 61.752 -12.752 -1.685 -0.349 0.029
## 4 45 0 0 84 74.274 9.726 1.286 0.342 0.029
## 18 43 0 0 83 74.994 8.006 1.066 0.334 0.028
## 17 53 0 0 61 71.396 -10.396 -1.364 -0.302 0.022
## 16 43 0 0 82 74.994 7.006 0.930 0.292 0.021
## 10 47 0 0 83 73.555 9.445 1.237 0.284 0.020
## 3 50 0 0 62 72.475 -10.475 -1.370 -0.280 0.019
## 45 51 1 51 51 60.090 -9.090 -1.188 -0.268 0.018
## 30 42 1 42 70 63.830 6.170 0.811 0.231 0.013
## 36 50 1 50 53 60.505 -7.505 -0.973 -0.208 0.011
## 39 50 1 50 53 60.505 -7.505 -0.973 -0.208 0.011
##
##
## PREDICTION ERROR
##
## -- Data, Predicted, Standard Error of Prediction, 95% Prediction Intervals
## [sorted by lower bound of prediction interval]
## [to see all intervals add n_pred_rows="all"]
## ----------------------------------------------
##
## Before TreatmentOld Before.TreatmentOld After pred s_pred pi.lwr pi.upr width
## 50 60 1 60 46 56.349 8.549 39.142 73.557 34.415
## 35 59 1 59 59 56.765 8.469 39.718 73.811 34.093
## 47 58 1 58 60 57.180 8.395 40.282 74.078 33.796
## ...
## 48 49 1 49 62 60.921 8.040 44.738 77.104 32.365
## 41 48 1 48 60 61.337 8.036 45.161 77.512 32.352
## 27 47 1 47 65 61.752 8.040 45.568 77.936 32.368
## ...
## 44 36 1 36 67 66.324 8.555 49.103 83.545 34.442
## 7 64 0 0 60 67.437 8.678 49.970 84.905 34.935
## 19 64 0 0 77 67.437 8.678 49.970 84.905 34.935
##
## -------------------------------------------
## Plot 1: Distribution of Residuals
## Plot 2: Residuals vs Fitted Values
## Plot 3: Scatterplot and Least-Squares Lines
## -------------------------------------------
df3 = read.csv("C:\\Thach\\VN trips\\2024_4Dec\\SIS Can Tho\\Datasets\\Orthodont.csv")
head(df3)
## id sex age distance
## 1 M01 Male 8 26.0
## 2 M01 Male 10 25.0
## 3 M01 Male 12 29.0
## 4 M01 Male 14 31.0
## 5 M02 Male 8 21.5
## 6 M02 Male 10 22.5
library(ggplot2)
ggplot(data = df3, aes(x = age, y = distance, group = id, col = id)) + geom_point() + geom_line()
m.1 = lm(distance ~ age, data = df3)
summary(m.1)
##
## Call:
## lm(formula = distance ~ age, data = df3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.5037 -1.5778 -0.1833 1.3519 6.3167
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16.7611 1.2256 13.676 < 0.0000000000000002 ***
## age 0.6602 0.1092 6.047 0.0000000225 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.537 on 106 degrees of freedom
## Multiple R-squared: 0.2565, Adjusted R-squared: 0.2495
## F-statistic: 36.56 on 1 and 106 DF, p-value: 0.00000002248
library(lme4)
## Loading required package: Matrix
m.2 = lmer(distance ~ 1 + age + (1 + age | id), data = df3)
summary(m.2)
## Linear mixed model fit by REML ['lmerMod']
## Formula: distance ~ 1 + age + (1 + age | id)
## Data: df3
##
## REML criterion at convergence: 442.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.2229 -0.4938 0.0073 0.4721 3.9161
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## id (Intercept) 5.41657 2.3274
## age 0.05128 0.2264 -0.61
## Residual 1.71616 1.3100
## Number of obs: 108, groups: id, 27
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 16.76111 0.77527 21.620
## age 0.66019 0.07126 9.265
##
## Correlation of Fixed Effects:
## (Intr)
## age -0.848
m.3 = lmer(distance ~ 1 + age + sex + age*sex + (1 + age | id), data = df3)
summary(m.3)
## Linear mixed model fit by REML ['lmerMod']
## Formula: distance ~ 1 + age + sex + age * sex + (1 + age | id)
## Data: df3
##
## REML criterion at convergence: 432.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.1694 -0.3862 0.0070 0.4454 3.8490
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## id (Intercept) 5.77449 2.4030
## age 0.03245 0.1801 -0.67
## Residual 1.71663 1.3102
## Number of obs: 108, groups: id, 27
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 17.3727 1.2281 14.147
## age 0.4795 0.1037 4.625
## sexMale -1.0321 1.5953 -0.647
## age:sexMale 0.3048 0.1347 2.263
##
## Correlation of Fixed Effects:
## (Intr) age sexMal
## age -0.880
## sexMale -0.770 0.678
## age:sexMale 0.678 -0.770 -0.880
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following objects are masked from 'package:lessR':
##
## bc, recode, sp
Anova(m.3, type = 3)
## Analysis of Deviance Table (Type III Wald chisquare tests)
##
## Response: distance
## Chisq Df Pr(>Chisq)
## (Intercept) 200.1256 1 < 0.00000000000000022 ***
## age 21.3860 1 0.000003755 ***
## sex 0.4186 1 0.51765
## age:sex 5.1208 1 0.02364 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1