library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggthemes)
library(gtsummary)
library(readstata13)
CGSS <- read.dta13("D:/cgss2021/cgss2021/CGSS2021.dta")
## Warning in read.dta13("D:/cgss2021/cgss2021/CGSS2021.dta"):
## Duplicated factor levels for variables
##
## XA1, XA6, XB1, XB6, XC1, XC6, isco08_a57e,
## isco08_a59d, isco08_a60d, isco08_sp,
## isco08_f, isco08_m
##
## Unique labels for these variables have been generated.
## Warning in read.dta13("D:/cgss2021/cgss2021/CGSS2021.dta"):
## Factor codes of type double or float detected in variables
##
## type, A8a, A8b, L2, L4b, L5, L8a, L8b, L9,
## A60m, A62, A75a, A75b
##
## No labels have been assigned.
## Set option 'nonint.factors = TRUE' to assign labels anyway.
## Warning in read.dta13("D:/cgss2021/cgss2021/CGSS2021.dta"):
## Missing factor labels for variables
##
## P18
##
## No labels have been assigned.
## Set option 'generate.factors=TRUE' to generate labels.
CGSS$A13[CGSS$A13>=800] <- NA
CGSS$A14[CGSS$A14>=800] <- NA
ggplot(
data = CGSS,
mapping = aes(x = A13, y = A14)
) +
geom_point()
## Warning: Removed 179 rows containing missing values (`geom_point()`).

ggplot(
data = CGSS,
mapping = aes(x = A13, y = A14,color = A2)
) +
geom_point()
## Warning: Removed 179 rows containing missing values (`geom_point()`).

ggplot(
data = CGSS,
mapping = aes(x = A13, y = A14, color = A2)
) +
geom_point() +
geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 179 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 179 rows containing missing values (`geom_point()`).

ggplot(
data = CGSS,
mapping = aes(x = A13, y = A14)
) +
geom_point(mapping = aes(color = A2)) +
geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 179 rows containing non-finite values (`stat_smooth()`).
## Removed 179 rows containing missing values (`geom_point()`).

ggplot(
data = CGSS,
mapping = aes(x = A13, y = A14)
) +
geom_point(mapping = aes(color = A2, shape = A2)) +
geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 179 rows containing non-finite values (`stat_smooth()`).
## Removed 179 rows containing missing values (`geom_point()`).

ggplot(
data = CGSS,
mapping = aes(x = A13, y = A14)
) +
geom_point(aes(color = A2, shape = A2)) +
geom_smooth(method = "lm") +
labs(
title = "Body mass and flipper length",
subtitle = "Dimensions for Adelie, Chinstrap, and Gentoo CGSS",
x = "Flipper length (mm)", y = "Body mass (g)",
color = "A2", shape = "A2"
) +
scale_color_colorblind()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 179 rows containing non-finite values (`stat_smooth()`).
## Removed 179 rows containing missing values (`geom_point()`).

ggplot(
data = CGSS,
mapping = aes(x = A13, y = A14)
) +
geom_point(aes(color = A2, shape = A2)) +
geom_smooth(method = "lm") +
labs(
title = "身高与体重",
subtitle = " CGSS2021",
x = "身高", y = "体重",
color = "A2", shape = "A2"
)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 179 rows containing non-finite values (`stat_smooth()`).
## Removed 179 rows containing missing values (`geom_point()`).

cgss2021 <- CGSS%>% select(
stype = A27d,
gender = A2,
birth = A3_1,
height = A13,
weight = A14,
edu = A7a,
poli = A10,
income = A8a,
famincome = A62,
health = A15,
happy = A36
)
ggplot(
data = cgss2021,
mapping = aes(x =height, y = weight)
) +
geom_point(aes(color = gender, shape = gender)) +
geom_smooth(method = "lm") +
labs(
title = "身高与体重",
subtitle = " CGSS2021",
x = "身高", y = "体重"
)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 179 rows containing non-finite values (`stat_smooth()`).
## Removed 179 rows containing missing values (`geom_point()`).

cgss2021 <- cgss2021 %>% mutate(age = 2021-birth)
#install.packages("gtsummary")
tbl_summary(cgss2021)
| Characteristic |
N = 8,148 |
| stype |
|
| 农村 |
5,931 (73%) |
| 乡镇 |
533 (6.5%) |
| 县城 |
535 (6.6%) |
| 城郊 |
76 (0.9%) |
| 城市市区 |
1,019 (13%) |
| 境外 |
2 (<0.1%) |
| 其他(请注明) |
9 (0.1%) |
| 不知道 |
41 (0.5%) |
| 拒绝回答 |
2 (<0.1%) |
| gender |
|
| 男 |
3,679 (45%) |
| 女 |
4,469 (55%) |
| birth |
1,968 (1,955, 1,984) |
| height |
163 (158, 170) |
| Unknown |
144 |
| weight |
120 (108, 140) |
| Unknown |
66 |
| edu |
|
| 没有受过任何教育 |
858 (11%) |
| 私塾、扫盲班 |
47 (0.6%) |
| 小学 |
1,751 (22%) |
| 初中 |
2,311 (28%) |
| 职业高中 |
127 (1.6%) |
| 普通高中 |
960 (12%) |
| 中专 |
374 (4.6%) |
| 技校 |
28 (0.3%) |
| 大学专科(成人高等教育) |
261 (3.2%) |
| 大学专科(正规高等教育) |
361 (4.4%) |
| 大学本科(成人高等教育) |
194 (2.4%) |
| 大学本科(正规高等教育) |
738 (9.1%) |
| 研究生及以上 |
117 (1.4%) |
| 其他(请注明) |
0 (0%) |
| Unknown |
21 |
| poli |
|
| 群众 |
6,557 (80%) |
| 共青团员 |
606 (7.4%) |
| 民主党派 |
6 (<0.1%) |
| 共产党员 |
966 (12%) |
| 不知道 |
12 (0.1%) |
| 拒绝回答 |
1 (<0.1%) |
| income |
30,000 (3,000, 70,000) |
| famincome |
80,000 (30,000, 300,000) |
| health |
|
| 很不健康 |
429 (5.3%) |
| 比较不健康 |
1,069 (13%) |
| 一般 |
2,288 (28%) |
| 比较健康 |
2,864 (35%) |
| 很健康 |
1,492 (18%) |
| 不知道 |
6 (<0.1%) |
| 拒绝回答 |
0 (0%) |
| happy |
|
| 非常不幸福 |
75 (1.4%) |
| 比较不幸福 |
243 (4.5%) |
| 说不上幸福不幸福 |
698 (13%) |
| 比较幸福 |
3,113 (57%) |
| 非常幸福 |
1,319 (24%) |
| 不知道 |
7 (0.1%) |
| 拒绝回答 |
3 (<0.1%) |
| Unknown |
2,690 |
| age |
53 (37, 66) |
trial2 <- cgss2021 %>% select(gender, age, happy)
trial2 %>% tbl_summary()
| Characteristic |
N = 8,148 |
| gender |
|
| 男 |
3,679 (45%) |
| 女 |
4,469 (55%) |
| age |
53 (37, 66) |
| happy |
|
| 非常不幸福 |
75 (1.4%) |
| 比较不幸福 |
243 (4.5%) |
| 说不上幸福不幸福 |
698 (13%) |
| 比较幸福 |
3,113 (57%) |
| 非常幸福 |
1,319 (24%) |
| 不知道 |
7 (0.1%) |
| 拒绝回答 |
3 (<0.1%) |
| Unknown |
2,690 |
trial2 %>%
tbl_summary(by = gender)
| Characteristic |
男, N = 3,679 |
女, N = 4,469 |
| age |
54 (38, 67) |
52 (37, 65) |
| happy |
|
|
| 非常不幸福 |
29 (1.2%) |
46 (1.5%) |
| 比较不幸福 |
103 (4.1%) |
140 (4.7%) |
| 说不上幸福不幸福 |
349 (14%) |
349 (12%) |
| 比较幸福 |
1,369 (55%) |
1,744 (59%) |
| 非常幸福 |
636 (26%) |
683 (23%) |
| 不知道 |
2 (<0.1%) |
5 (0.2%) |
| 拒绝回答 |
0 (0%) |
3 (0.1%) |
| Unknown |
1,191 |
1,499 |