A = c(8, 9, 11, 4, 7, 8, 5)
B = c(7, 17, 10, 14, 12, 24, 11, 22)
C = c(28, 21, 26, 11, 24, 19)
D = c(26, 16, 13, 12, 9, 10, 11, 17, 15)
weight = c(A, B, C, D)
group = c(rep("A", 7), rep("B", 8), rep("C", 6), rep("D", 9))
data = data.frame(weight, group)
data
## weight group
## 1 8 A
## 2 9 A
## 3 11 A
## 4 4 A
## 5 7 A
## 6 8 A
## 7 5 A
## 8 7 B
## 9 17 B
## 10 10 B
## 11 14 B
## 12 12 B
## 13 24 B
## 14 11 B
## 15 22 B
## 16 28 C
## 17 21 C
## 18 26 C
## 19 11 C
## 20 24 C
## 21 19 C
## 22 26 D
## 23 16 D
## 24 13 D
## 25 12 D
## 26 9 D
## 27 10 D
## 28 11 D
## 29 17 D
## 30 15 D
library(table1)
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
table1(~ weight | group, data = data, render.continuous = c(. = "Mean (SD)", . = "Median [Q1, Q3]"))
| A (N=7) |
B (N=8) |
C (N=6) |
D (N=9) |
Overall (N=30) |
|
|---|---|---|---|---|---|
| weight | |||||
| Mean (SD) | 7.43 (2.37) | 14.6 (5.95) | 21.5 (6.09) | 14.3 (5.15) | 14.2 (6.75) |
| Median [Q1, Q3] | 8.00 [6.00, 8.50] | 13.0 [10.8, 18.3] | 22.5 [19.5, 25.5] | 13.0 [11.0, 16.0] | 12.0 [9.25, 18.5] |
av = aov(weight ~ group, data = data)
summary(av)
## Df Sum Sq Mean Sq F value Pr(>F)
## group 3 642.3 214.09 8.197 0.000528 ***
## Residuals 26 679.1 26.12
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(av)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = weight ~ group, data = data)
##
## $group
## diff lwr upr p adj
## B-A 7.1964286 -0.05969765 14.4525548 0.0525014
## C-A 14.0714286 6.27132726 21.8715299 0.0002134
## D-A 6.9047619 -0.16073856 13.9702624 0.0571911
## C-B 6.8750000 -0.69675602 14.4467560 0.0850381
## D-B -0.2916667 -7.10424368 6.5209103 0.9994049
## D-C -7.1666667 -14.55594392 0.2226106 0.0597131
plot(TukeyHSD(av), las = 2, col = "blue")
df = read.csv("E:\\Khoa XD\\Nam 2025-2026\\Tap huan NCKH\\Demo.csv")
dim(df)
## [1] 1217 7
head(df)
## X id age gender weight height pcfat
## 1 1 1 53 F 49 150 37.3
## 2 2 2 65 M 52 165 16.8
## 3 3 3 64 F 57 157 34.0
## 4 4 4 56 F 53 156 33.8
## 5 5 5 54 M 51 160 14.8
## 6 6 6 52 F 47 153 32.2
library(table1)
table1(~ weight + height, data = df)
| Overall (N=1217) |
|
|---|---|
| weight | |
| Mean (SD) | 55.1 (9.40) |
| Median [Min, Max] | 54.0 [34.0, 95.0] |
| height | |
| Mean (SD) | 157 (7.98) |
| Median [Min, Max] | 155 [136, 185] |
plot(height ~ weight, data = df)
library(ggplot2)
ggplot(data = df, aes(x = weight, y = height)) + geom_point() + geom_smooth()
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
cor.test(df$weight, df$height)
##
## Pearson's product-moment correlation
##
## data: df$weight and df$height
## t = 25.984, df = 1215, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.5602911 0.6326135
## sample estimates:
## cor
## 0.5976667
library(lessR)
##
## lessR 4.4.5 feedback: gerbing@pdx.edu
## --------------------------------------------------------------
## > d <- Read("") Read data file, many formats available, e.g., Excel
## d is default data frame, data= in analysis routines optional
##
## Many examples of reading, writing, and manipulating data,
## graphics, testing means and proportions, regression, factor analysis,
## customization, forecasting, and aggregation from pivot tables
## Enter: browseVignettes("lessR")
##
## View lessR updates, now including time series forecasting
## Enter: news(package="lessR")
##
## Interactive data analysis
## Enter: interact()
##
## Attaching package: 'lessR'
## The following object is masked from 'package:table1':
##
## label
Plot(height, pcfat, data = df, fit = "lm")
##
##
## >>> Suggestions or enter: style(suggest=FALSE)
## Plot(height, pcfat, enhance=TRUE) # many options
## Plot(height, pcfat, fill="skyblue") # interior fill color of points
## Plot(height, pcfat, out_cut=.10) # label top 10% from center as outliers
##
##
## >>> Pearson's product-moment correlation
##
## Number of paired values with neither missing, n = 1217
## Sample Correlation of height and pcfat: r = -0.480
##
## Hypothesis Test of 0 Correlation: t = -19.063, df = 1215, p-value = 0.000
## 95% Confidence Interval for Correlation: -0.522 to -0.435
##
##
## Line: b0 = 99.311633 b1 = -0.432014 Linear Model MSE = 39.747926 Rsq = 0.230
##
cor.test(df$weight, df$pcfat)
##
## Pearson's product-moment correlation
##
## data: df$weight and df$pcfat
## t = 1.9745, df = 1215, p-value = 0.04855
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.0003640405 0.1123913870
## sample estimates:
## cor
## 0.05655573
library(gapminder)
data(gapminder)
vn = subset(gapminder, country == "Vietnam")
library(lessR)
Plot(lifeExp, year, data = vn, xlab = "Life expectancy (years)", ylab = "Year")
##
## >>> Suggestions or enter: style(suggest=FALSE)
## Plot(lifeExp, year, enhance=TRUE) # many options
## Plot(lifeExp, year, fill="skyblue") # interior fill color of points
## Plot(lifeExp, year, fit="lm", fit_se=c(.90,.99)) # fit line, stnd errors
## Plot(lifeExp, year, out_cut=.10) # label top 10% from center as outliers
##
##
## >>> Pearson's product-moment correlation
##
## Number of paired values with neither missing, n = 12
## Sample Correlation of lifeExp and year: r = 0.995
##
## Hypothesis Test of 0 Correlation: t = 30.569, df = 10, p-value = 0.000
## 95% Confidence Interval for Correlation: 0.981 to 0.999
##
m1 = lm(lifeExp ~ year, data = vn)
summary(m1)
##
## Call:
## lm(formula = lifeExp ~ year, data = vn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.1884 -0.5840 0.1335 0.7396 1.7873
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1271.98315 43.49240 -29.25 0.0000000000510 ***
## year 0.67162 0.02197 30.57 0.0000000000329 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.314 on 10 degrees of freedom
## Multiple R-squared: 0.9894, Adjusted R-squared: 0.9884
## F-statistic: 934.5 on 1 and 10 DF, p-value: 0.00000000003289
m2 = Regression(lifeExp ~ year, data = vn)
m2
## >>> Suggestion
## # Create an R markdown file for interpretative output with Rmd = "file_name"
## Regression(my_formula=lifeExp ~ year, data=vn, Rmd="eg")
##
##
## BACKGROUND
##
## Data Frame: vn
##
## Response Variable: lifeExp
## Predictor Variable: year
##
## Number of cases (rows) of data: 12
## Number of cases retained for analysis: 12
##
##
## BASIC ANALYSIS
##
## Estimate Std Err t-value p-value Lower 95% Upper 95%
## (Intercept) -1271.9832 43.4924 -29.246 0.000 -1368.8903 -1175.0760
## year 0.6716 0.0220 30.569 0.000 0.6227 0.7206
##
## Standard deviation of lifeExp: 12.17233
##
## Standard deviation of residuals: 1.31365 for df=10
## 95% range of residuals: 5.85399 = 2 * (2.228 * 1.31365)
##
## R-squared: 0.989 Adjusted R-squared: 0.988 PRESS R-squared: 0.984
##
## Null hypothesis of all 0 population slope coefficients:
## F-statistic: 934.455 df: 1 and 10 p-value: 0.000
##
## -- Analysis of Variance
##
## df Sum Sq Mean Sq F-value p-value
## Model 1 1612.5653 1612.5653 934.4554 0.000
## Residuals 10 17.2567 1.7257
## lifeExp 11 1629.8221 148.1656
##
##
## K-FOLD CROSS-VALIDATION
##
##
## RELATIONS AMONG THE VARIABLES
##
## lifeExp year
## lifeExp 1.00 0.99
## year 0.99 1.00
##
##
## RESIDUALS AND INFLUENCE
##
## -- Data, Fitted, Residual, Studentized Residual, Dffits, Cook's Distance
## [sorted by Cook's Distance]
## [n_res_rows = 12, out of 12 ]
## -------------------------------------------------------------
## year lifeExp fitted resid rstdnt dffits cooks
## 12 2007 74.2490 75.9489 -1.6999 -1.6742 -1.0827 0.4965
## 1 1952 40.4120 39.0101 1.4019 1.3167 0.8515 0.3377
## 5 1972 50.2540 52.4424 -2.1884 -2.0016 -0.6637 0.1694
## 9 1992 67.6620 65.8747 1.7873 1.5563 0.5937 0.1543
## 10 1997 70.6720 69.2328 1.4392 1.2327 0.5559 0.1469
## 4 1967 47.8380 49.0843 -1.2463 -1.0172 -0.3880 0.0750
## 2 1957 42.8870 42.3682 0.5188 0.4300 0.2316 0.0292
## 11 2002 73.0170 72.5908 0.4262 0.3520 0.1896 0.0197
## 3 1962 45.3630 45.7262 -0.3632 -0.2891 -0.1304 0.0094
## 7 1982 58.8160 59.1585 -0.3425 -0.2596 -0.0792 0.0035
## 8 1987 62.8200 62.5166 0.3034 0.2315 0.0768 0.0032
## 6 1977 55.7640 55.8005 -0.0365 -0.0275 -0.0084 0.0000
##
##
## PREDICTION ERROR
##
## -- Data, Predicted, Standard Error of Prediction, 95% Prediction Intervals
## [sorted by lower bound of prediction interval]
## ----------------------------------------------
##
## year lifeExp pred s_pred pi.lwr pi.upr width
## 1 1952 40.4120 39.0101 1.4948 35.6794 42.3408 6.6614
## 2 1957 42.8870 42.3682 1.4539 39.1286 45.6077 6.4790
## 3 1962 45.3630 45.7262 1.4203 42.5616 48.8909 6.3293
## 4 1967 47.8380 49.0843 1.3946 45.9770 52.1917 6.2147
## 5 1972 50.2540 52.4424 1.3772 49.3738 55.5109 6.1371
## 6 1977 55.7640 55.8005 1.3684 52.7515 58.8494 6.0979
## 7 1982 58.8160 59.1585 1.3684 56.1096 62.2075 6.0979
## 8 1987 62.8200 62.5166 1.3772 59.4481 65.5852 6.1371
## 9 1992 67.6620 65.8747 1.3946 62.7673 68.9820 6.2147
## 10 1997 70.6720 69.2328 1.4203 66.0681 72.3974 6.3293
## 11 2002 73.0170 72.5908 1.4539 69.3513 75.8304 6.4790
## 12 2007 74.2490 75.9489 1.4948 72.6182 79.2796 6.6614
##
## ----------------------------------
## Plot 1: Distribution of Residuals
## Plot 2: Residuals vs Fitted Values
## ----------------------------------
par(mfrow = c(2, 2))
plot(m1)
lifeExp = -1271.98 + 0.67*year
#### Dữ liệu
year <- c(1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992, 1997, 2002, 2007)
lifeExp <- c(40.4, 42.9, 45.4, 47.8, 50.3, 55.8, 58.8, 62.8, 67.7, 70.7, 73.0, 74.2)
#### Đưa vào data frame
df <- data.frame(year, lifeExp)
#### Xem dữ liệu
print(df)
## year lifeExp
## 1 1952 40.4
## 2 1957 42.9
## 3 1962 45.4
## 4 1967 47.8
## 5 1972 50.3
## 6 1977 55.8
## 7 1982 58.8
## 8 1987 62.8
## 9 1992 67.7
## 10 1997 70.7
## 11 2002 73.0
## 12 2007 74.2
#### Biểu đồ xu hướng
plot(year, lifeExp, type = "b", pch = 19, col = "blue",
main = "Xu hướng gia tăng tuổi thọ (1952–2007)",
xlab = "Năm", ylab = "Tuổi thọ trung bình")
#### Hồi quy tuyến tính
m1 <- lm(lifeExp ~ year, data = df)
abline(m1, col = "red", lwd = 2)
#### Kết quả hồi quy
summary(m1)
##
## Call:
## lm(formula = lifeExp ~ year, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.1494 -0.5944 0.1387 0.7324 1.8268
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1271.13492 43.77173 -29.04 0.0000000000547 ***
## year 0.67119 0.02211 30.35 0.0000000000353 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.322 on 10 degrees of freedom
## Multiple R-squared: 0.9893, Adjusted R-squared: 0.9882
## F-statistic: 921.4 on 1 and 10 DF, p-value: 0.00000000003527
#### Dự báo mức tăng trung bình mỗi năm
slope <- coef(m1)[2]
cat("Mức tăng tuổi thọ trung bình mỗi năm:", round(slope, 3), "năm\n")
## Mức tăng tuổi thọ trung bình mỗi năm: 0.671 năm
#### Tính tổng mức tăng từ 1952 đến 2007
increase_total <- slope * (2007 - 1952)
cat("Tổng mức tăng ước lượng (1952–2007):", round(increase_total, 1), "năm\n")
## Tổng mức tăng ước lượng (1952–2007): 36.9 năm
#### Dự báo năm 2015 chẳng hạn
predict(m1, newdata = data.frame(year = 2015))
## 1
## 81.31054
Y = c(12.1, 11.9, 10.2, 8.0, 7.7, 5.3, 7.9, 7.8, 5.5, 2.6)
X1 = c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)
X2 = c(7, 4, 4, 6, 4, 2, 1, 1, 1, 0)
df = data.frame(Y, X1, X2)
dim(df)
## [1] 10 3
head(df)
## Y X1 X2
## 1 12.1 0 7
## 2 11.9 1 4
## 3 10.2 2 4
## 4 8.0 3 6
## 5 7.7 4 4
## 6 5.3 5 2
m3 = lm(Y ~ X1, data = df)
summary(m3)
##
## Call:
## lm(formula = Y ~ X1, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.1606 -1.0735 0.1742 0.8621 2.0970
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.8545 0.8283 14.312 0.000000554 ***
## X1 -0.8788 0.1552 -5.664 0.000474 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.409 on 8 degrees of freedom
## Multiple R-squared: 0.8004, Adjusted R-squared: 0.7755
## F-statistic: 32.08 on 1 and 8 DF, p-value: 0.0004737
m4 = lm(Y ~ X2, data = df)
summary(m4)
##
## Call:
## lm(formula = Y ~ X2, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.702 -1.533 -0.034 1.667 3.066
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.0980 1.1222 4.543 0.00189 **
## X2 0.9340 0.2999 3.114 0.01436 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.121 on 8 degrees of freedom
## Multiple R-squared: 0.548, Adjusted R-squared: 0.4915
## F-statistic: 9.698 on 1 and 8 DF, p-value: 0.01436
m5 = lm(Y ~ X1 + X2, data = df)
summary(m5)
##
## Call:
## lm(formula = Y ~ X1 + X2, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.46078 -0.33384 0.00026 0.81856 1.98476
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.7076 2.9785 4.938 0.00168 **
## X1 -1.2042 0.3614 -3.332 0.01255 *
## X2 -0.4629 0.4642 -0.997 0.35187
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.41 on 7 degrees of freedom
## Multiple R-squared: 0.8252, Adjusted R-squared: 0.7753
## F-statistic: 16.53 on 2 and 7 DF, p-value: 0.002232