bw <- read.csv("birthwt.csv")
###4.1 Có bao nhiêu biến số (variable) và quan sát (observation)
dim(bw)
## [1] 189 11
head(bw, 6)
## id low age lwt race smoke ptl ht ui ftv bwt
## 1 85 0 19 182 2 0 0 0 1 0 2523
## 2 86 0 33 155 3 0 0 0 0 3 2551
## 3 87 0 20 105 1 1 0 0 0 1 2557
## 4 88 0 21 108 1 1 0 0 1 2 2594
## 5 89 0 18 107 1 1 0 0 1 0 2600
## 6 91 0 21 124 3 0 0 0 0 0 2622
10 dong cuoi
tail(bw, 10)
## id low age lwt race smoke ptl ht ui ftv bwt
## 180 71 1 17 120 2 0 0 0 0 2 2438
## 181 75 1 26 154 3 0 1 1 0 1 2442
## 182 76 1 20 105 3 0 0 0 0 3 2450
## 183 77 1 26 190 1 1 0 0 0 0 2466
## 184 78 1 14 101 3 1 1 0 0 0 2466
## 185 79 1 28 95 1 1 0 0 0 2 2466
## 186 81 1 14 100 3 0 0 0 0 2 2495
## 187 82 1 23 94 3 1 0 0 0 0 2495
## 188 83 1 17 142 2 0 0 1 0 0 2495
## 189 84 1 21 130 1 1 0 1 0 3 2495
##Việc 5: Biên tập dữ liệu ###5.1 Tạo biến số mới mwt là cân nặng của mẹ tính bằng kg
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
bw %>%
mutate(mwt = round(lwt * 0.453592, digits = 2))
## id low age lwt race smoke ptl ht ui ftv bwt mwt
## 1 85 0 19 182 2 0 0 0 1 0 2523 82.55
## 2 86 0 33 155 3 0 0 0 0 3 2551 70.31
## 3 87 0 20 105 1 1 0 0 0 1 2557 47.63
## 4 88 0 21 108 1 1 0 0 1 2 2594 48.99
## 5 89 0 18 107 1 1 0 0 1 0 2600 48.53
## 6 91 0 21 124 3 0 0 0 0 0 2622 56.25
## 7 92 0 22 118 1 0 0 0 0 1 2637 53.52
## 8 93 0 17 103 3 0 0 0 0 1 2637 46.72
## 9 94 0 29 123 1 1 0 0 0 1 2663 55.79
## 10 95 0 26 113 1 1 0 0 0 0 2665 51.26
## 11 96 0 19 95 3 0 0 0 0 0 2722 43.09
## 12 97 0 19 150 3 0 0 0 0 1 2733 68.04
## 13 98 0 22 95 3 0 0 1 0 0 2751 43.09
## 14 99 0 30 107 3 0 1 0 1 2 2750 48.53
## 15 100 0 18 100 1 1 0 0 0 0 2769 45.36
## 16 101 0 18 100 1 1 0 0 0 0 2769 45.36
## 17 102 0 15 98 2 0 0 0 0 0 2778 44.45
## 18 103 0 25 118 1 1 0 0 0 3 2782 53.52
## 19 104 0 20 120 3 0 0 0 1 0 2807 54.43
## 20 105 0 28 120 1 1 0 0 0 1 2821 54.43
## 21 106 0 32 121 3 0 0 0 0 2 2835 54.88
## 22 107 0 31 100 1 0 0 0 1 3 2835 45.36
## 23 108 0 36 202 1 0 0 0 0 1 2836 91.63
## 24 109 0 28 120 3 0 0 0 0 0 2863 54.43
## 25 111 0 25 120 3 0 0 0 1 2 2877 54.43
## 26 112 0 28 167 1 0 0 0 0 0 2877 75.75
## 27 113 0 17 122 1 1 0 0 0 0 2906 55.34
## 28 114 0 29 150 1 0 0 0 0 2 2920 68.04
## 29 115 0 26 168 2 1 0 0 0 0 2920 76.20
## 30 116 0 17 113 2 0 0 0 0 1 2920 51.26
## 31 117 0 17 113 2 0 0 0 0 1 2920 51.26
## 32 118 0 24 90 1 1 1 0 0 1 2948 40.82
## 33 119 0 35 121 2 1 1 0 0 1 2948 54.88
## 34 120 0 25 155 1 0 0 0 0 1 2977 70.31
## 35 121 0 25 125 2 0 0 0 0 0 2977 56.70
## 36 123 0 29 140 1 1 0 0 0 2 2977 63.50
## 37 124 0 19 138 1 1 0 0 0 2 2977 62.60
## 38 125 0 27 124 1 1 0 0 0 0 2922 56.25
## 39 126 0 31 215 1 1 0 0 0 2 3005 97.52
## 40 127 0 33 109 1 1 0 0 0 1 3033 49.44
## 41 128 0 21 185 2 1 0 0 0 2 3042 83.91
## 42 129 0 19 189 1 0 0 0 0 2 3062 85.73
## 43 130 0 23 130 2 0 0 0 0 1 3062 58.97
## 44 131 0 21 160 1 0 0 0 0 0 3062 72.57
## 45 132 0 18 90 1 1 0 0 1 0 3062 40.82
## 46 133 0 18 90 1 1 0 0 1 0 3062 40.82
## 47 134 0 32 132 1 0 0 0 0 4 3080 59.87
## 48 135 0 19 132 3 0 0 0 0 0 3090 59.87
## 49 136 0 24 115 1 0 0 0 0 2 3090 52.16
## 50 137 0 22 85 3 1 0 0 0 0 3090 38.56
## 51 138 0 22 120 1 0 0 1 0 1 3100 54.43
## 52 139 0 23 128 3 0 0 0 0 0 3104 58.06
## 53 140 0 22 130 1 1 0 0 0 0 3132 58.97
## 54 141 0 30 95 1 1 0 0 0 2 3147 43.09
## 55 142 0 19 115 3 0 0 0 0 0 3175 52.16
## 56 143 0 16 110 3 0 0 0 0 0 3175 49.90
## 57 144 0 21 110 3 1 0 0 1 0 3203 49.90
## 58 145 0 30 153 3 0 0 0 0 0 3203 69.40
## 59 146 0 20 103 3 0 0 0 0 0 3203 46.72
## 60 147 0 17 119 3 0 0 0 0 0 3225 53.98
## 61 148 0 17 119 3 0 0 0 0 0 3225 53.98
## 62 149 0 23 119 3 0 0 0 0 2 3232 53.98
## 63 150 0 24 110 3 0 0 0 0 0 3232 49.90
## 64 151 0 28 140 1 0 0 0 0 0 3234 63.50
## 65 154 0 26 133 3 1 2 0 0 0 3260 60.33
## 66 155 0 20 169 3 0 1 0 1 1 3274 76.66
## 67 156 0 24 115 3 0 0 0 0 2 3274 52.16
## 68 159 0 28 250 3 1 0 0 0 6 3303 113.40
## 69 160 0 20 141 1 0 2 0 1 1 3317 63.96
## 70 161 0 22 158 2 0 1 0 0 2 3317 71.67
## 71 162 0 22 112 1 1 2 0 0 0 3317 50.80
## 72 163 0 31 150 3 1 0 0 0 2 3321 68.04
## 73 164 0 23 115 3 1 0 0 0 1 3331 52.16
## 74 166 0 16 112 2 0 0 0 0 0 3374 50.80
## 75 167 0 16 135 1 1 0 0 0 0 3374 61.23
## 76 168 0 18 229 2 0 0 0 0 0 3402 103.87
## 77 169 0 25 140 1 0 0 0 0 1 3416 63.50
## 78 170 0 32 134 1 1 1 0 0 4 3430 60.78
## 79 172 0 20 121 2 1 0 0 0 0 3444 54.88
## 80 173 0 23 190 1 0 0 0 0 0 3459 86.18
## 81 174 0 22 131 1 0 0 0 0 1 3460 59.42
## 82 175 0 32 170 1 0 0 0 0 0 3473 77.11
## 83 176 0 30 110 3 0 0 0 0 0 3544 49.90
## 84 177 0 20 127 3 0 0 0 0 0 3487 57.61
## 85 179 0 23 123 3 0 0 0 0 0 3544 55.79
## 86 180 0 17 120 3 1 0 0 0 0 3572 54.43
## 87 181 0 19 105 3 0 0 0 0 0 3572 47.63
## 88 182 0 23 130 1 0 0 0 0 0 3586 58.97
## 89 183 0 36 175 1 0 0 0 0 0 3600 79.38
## 90 184 0 22 125 1 0 0 0 0 1 3614 56.70
## 91 185 0 24 133 1 0 0 0 0 0 3614 60.33
## 92 186 0 21 134 3 0 0 0 0 2 3629 60.78
## 93 187 0 19 235 1 1 0 1 0 0 3629 106.59
## 94 188 0 25 95 1 1 3 0 1 0 3637 43.09
## 95 189 0 16 135 1 1 0 0 0 0 3643 61.23
## 96 190 0 29 135 1 0 0 0 0 1 3651 61.23
## 97 191 0 29 154 1 0 0 0 0 1 3651 69.85
## 98 192 0 19 147 1 1 0 0 0 0 3651 66.68
## 99 193 0 19 147 1 1 0 0 0 0 3651 66.68
## 100 195 0 30 137 1 0 0 0 0 1 3699 62.14
## 101 196 0 24 110 1 0 0 0 0 1 3728 49.90
## 102 197 0 19 184 1 1 0 1 0 0 3756 83.46
## 103 199 0 24 110 3 0 1 0 0 0 3770 49.90
## 104 200 0 23 110 1 0 0 0 0 1 3770 49.90
## 105 201 0 20 120 3 0 0 0 0 0 3770 54.43
## 106 202 0 25 241 2 0 0 1 0 0 3790 109.32
## 107 203 0 30 112 1 0 0 0 0 1 3799 50.80
## 108 204 0 22 169 1 0 0 0 0 0 3827 76.66
## 109 205 0 18 120 1 1 0 0 0 2 3856 54.43
## 110 206 0 16 170 2 0 0 0 0 4 3860 77.11
## 111 207 0 32 186 1 0 0 0 0 2 3860 84.37
## 112 208 0 18 120 3 0 0 0 0 1 3884 54.43
## 113 209 0 29 130 1 1 0 0 0 2 3884 58.97
## 114 210 0 33 117 1 0 0 0 1 1 3912 53.07
## 115 211 0 20 170 1 1 0 0 0 0 3940 77.11
## 116 212 0 28 134 3 0 0 0 0 1 3941 60.78
## 117 213 0 14 135 1 0 0 0 0 0 3941 61.23
## 118 214 0 28 130 3 0 0 0 0 0 3969 58.97
## 119 215 0 25 120 1 0 0 0 0 2 3983 54.43
## 120 216 0 16 95 3 0 0 0 0 1 3997 43.09
## 121 217 0 20 158 1 0 0 0 0 1 3997 71.67
## 122 218 0 26 160 3 0 0 0 0 0 4054 72.57
## 123 219 0 21 115 1 0 0 0 0 1 4054 52.16
## 124 220 0 22 129 1 0 0 0 0 0 4111 58.51
## 125 221 0 25 130 1 0 0 0 0 2 4153 58.97
## 126 222 0 31 120 1 0 0 0 0 2 4167 54.43
## 127 223 0 35 170 1 0 1 0 0 1 4174 77.11
## 128 224 0 19 120 1 1 0 0 0 0 4238 54.43
## 129 225 0 24 116 1 0 0 0 0 1 4593 52.62
## 130 226 0 45 123 1 0 0 0 0 1 4990 55.79
## 131 4 1 28 120 3 1 1 0 1 0 709 54.43
## 132 10 1 29 130 1 0 0 0 1 2 1021 58.97
## 133 11 1 34 187 2 1 0 1 0 0 1135 84.82
## 134 13 1 25 105 3 0 1 1 0 0 1330 47.63
## 135 15 1 25 85 3 0 0 0 1 0 1474 38.56
## 136 16 1 27 150 3 0 0 0 0 0 1588 68.04
## 137 17 1 23 97 3 0 0 0 1 1 1588 44.00
## 138 18 1 24 128 2 0 1 0 0 1 1701 58.06
## 139 19 1 24 132 3 0 0 1 0 0 1729 59.87
## 140 20 1 21 165 1 1 0 1 0 1 1790 74.84
## 141 22 1 32 105 1 1 0 0 0 0 1818 47.63
## 142 23 1 19 91 1 1 2 0 1 0 1885 41.28
## 143 24 1 25 115 3 0 0 0 0 0 1893 52.16
## 144 25 1 16 130 3 0 0 0 0 1 1899 58.97
## 145 26 1 25 92 1 1 0 0 0 0 1928 41.73
## 146 27 1 20 150 1 1 0 0 0 2 1928 68.04
## 147 28 1 21 200 2 0 0 0 1 2 1928 90.72
## 148 29 1 24 155 1 1 1 0 0 0 1936 70.31
## 149 30 1 21 103 3 0 0 0 0 0 1970 46.72
## 150 31 1 20 125 3 0 0 0 1 0 2055 56.70
## 151 32 1 25 89 3 0 2 0 0 1 2055 40.37
## 152 33 1 19 102 1 0 0 0 0 2 2082 46.27
## 153 34 1 19 112 1 1 0 0 1 0 2084 50.80
## 154 35 1 26 117 1 1 1 0 0 0 2084 53.07
## 155 36 1 24 138 1 0 0 0 0 0 2100 62.60
## 156 37 1 17 130 3 1 1 0 1 0 2125 58.97
## 157 40 1 20 120 2 1 0 0 0 3 2126 54.43
## 158 42 1 22 130 1 1 1 0 1 1 2187 58.97
## 159 43 1 27 130 2 0 0 0 1 0 2187 58.97
## 160 44 1 20 80 3 1 0 0 1 0 2211 36.29
## 161 45 1 17 110 1 1 0 0 0 0 2225 49.90
## 162 46 1 25 105 3 0 1 0 0 1 2240 47.63
## 163 47 1 20 109 3 0 0 0 0 0 2240 49.44
## 164 49 1 18 148 3 0 0 0 0 0 2282 67.13
## 165 50 1 18 110 2 1 1 0 0 0 2296 49.90
## 166 51 1 20 121 1 1 1 0 1 0 2296 54.88
## 167 52 1 21 100 3 0 1 0 0 4 2301 45.36
## 168 54 1 26 96 3 0 0 0 0 0 2325 43.54
## 169 56 1 31 102 1 1 1 0 0 1 2353 46.27
## 170 57 1 15 110 1 0 0 0 0 0 2353 49.90
## 171 59 1 23 187 2 1 0 0 0 1 2367 84.82
## 172 60 1 20 122 2 1 0 0 0 0 2381 55.34
## 173 61 1 24 105 2 1 0 0 0 0 2381 47.63
## 174 62 1 15 115 3 0 0 0 1 0 2381 52.16
## 175 63 1 23 120 3 0 0 0 0 0 2410 54.43
## 176 65 1 30 142 1 1 1 0 0 0 2410 64.41
## 177 67 1 22 130 1 1 0 0 0 1 2410 58.97
## 178 68 1 17 120 1 1 0 0 0 3 2414 54.43
## 179 69 1 23 110 1 1 1 0 0 0 2424 49.90
## 180 71 1 17 120 2 0 0 0 0 2 2438 54.43
## 181 75 1 26 154 3 0 1 1 0 1 2442 69.85
## 182 76 1 20 105 3 0 0 0 0 3 2450 47.63
## 183 77 1 26 190 1 1 0 0 0 0 2466 86.18
## 184 78 1 14 101 3 1 1 0 0 0 2466 45.81
## 185 79 1 28 95 1 1 0 0 0 2 2466 43.09
## 186 81 1 14 100 3 0 0 0 0 2 2495 45.36
## 187 82 1 23 94 3 1 0 0 0 0 2495 42.64
## 188 83 1 17 142 2 0 0 1 0 0 2495 64.41
## 189 84 1 21 130 1 1 0 1 0 3 2495 58.97
###5.2 Tạo biến số mới ethnicity là biến factor với điều kiện sau: Nếu race = 1 thì ethnicity = “White” Nếu race = 2 thì ethnicity = “Black” Nếu race = 3 thì ethnicity = “Other”
bw <- bw %>%
mutate(ethnicity = recode_factor(race,
"1" = "White",
"2" = "Black",
"3" = "Other"))
bw %>%
mutate(ethnicity = factor(race,
levels = c(1, 2, 3),
labels = c("White", "Black", "Other")))
## id low age lwt race smoke ptl ht ui ftv bwt ethnicity
## 1 85 0 19 182 2 0 0 0 1 0 2523 Black
## 2 86 0 33 155 3 0 0 0 0 3 2551 Other
## 3 87 0 20 105 1 1 0 0 0 1 2557 White
## 4 88 0 21 108 1 1 0 0 1 2 2594 White
## 5 89 0 18 107 1 1 0 0 1 0 2600 White
## 6 91 0 21 124 3 0 0 0 0 0 2622 Other
## 7 92 0 22 118 1 0 0 0 0 1 2637 White
## 8 93 0 17 103 3 0 0 0 0 1 2637 Other
## 9 94 0 29 123 1 1 0 0 0 1 2663 White
## 10 95 0 26 113 1 1 0 0 0 0 2665 White
## 11 96 0 19 95 3 0 0 0 0 0 2722 Other
## 12 97 0 19 150 3 0 0 0 0 1 2733 Other
## 13 98 0 22 95 3 0 0 1 0 0 2751 Other
## 14 99 0 30 107 3 0 1 0 1 2 2750 Other
## 15 100 0 18 100 1 1 0 0 0 0 2769 White
## 16 101 0 18 100 1 1 0 0 0 0 2769 White
## 17 102 0 15 98 2 0 0 0 0 0 2778 Black
## 18 103 0 25 118 1 1 0 0 0 3 2782 White
## 19 104 0 20 120 3 0 0 0 1 0 2807 Other
## 20 105 0 28 120 1 1 0 0 0 1 2821 White
## 21 106 0 32 121 3 0 0 0 0 2 2835 Other
## 22 107 0 31 100 1 0 0 0 1 3 2835 White
## 23 108 0 36 202 1 0 0 0 0 1 2836 White
## 24 109 0 28 120 3 0 0 0 0 0 2863 Other
## 25 111 0 25 120 3 0 0 0 1 2 2877 Other
## 26 112 0 28 167 1 0 0 0 0 0 2877 White
## 27 113 0 17 122 1 1 0 0 0 0 2906 White
## 28 114 0 29 150 1 0 0 0 0 2 2920 White
## 29 115 0 26 168 2 1 0 0 0 0 2920 Black
## 30 116 0 17 113 2 0 0 0 0 1 2920 Black
## 31 117 0 17 113 2 0 0 0 0 1 2920 Black
## 32 118 0 24 90 1 1 1 0 0 1 2948 White
## 33 119 0 35 121 2 1 1 0 0 1 2948 Black
## 34 120 0 25 155 1 0 0 0 0 1 2977 White
## 35 121 0 25 125 2 0 0 0 0 0 2977 Black
## 36 123 0 29 140 1 1 0 0 0 2 2977 White
## 37 124 0 19 138 1 1 0 0 0 2 2977 White
## 38 125 0 27 124 1 1 0 0 0 0 2922 White
## 39 126 0 31 215 1 1 0 0 0 2 3005 White
## 40 127 0 33 109 1 1 0 0 0 1 3033 White
## 41 128 0 21 185 2 1 0 0 0 2 3042 Black
## 42 129 0 19 189 1 0 0 0 0 2 3062 White
## 43 130 0 23 130 2 0 0 0 0 1 3062 Black
## 44 131 0 21 160 1 0 0 0 0 0 3062 White
## 45 132 0 18 90 1 1 0 0 1 0 3062 White
## 46 133 0 18 90 1 1 0 0 1 0 3062 White
## 47 134 0 32 132 1 0 0 0 0 4 3080 White
## 48 135 0 19 132 3 0 0 0 0 0 3090 Other
## 49 136 0 24 115 1 0 0 0 0 2 3090 White
## 50 137 0 22 85 3 1 0 0 0 0 3090 Other
## 51 138 0 22 120 1 0 0 1 0 1 3100 White
## 52 139 0 23 128 3 0 0 0 0 0 3104 Other
## 53 140 0 22 130 1 1 0 0 0 0 3132 White
## 54 141 0 30 95 1 1 0 0 0 2 3147 White
## 55 142 0 19 115 3 0 0 0 0 0 3175 Other
## 56 143 0 16 110 3 0 0 0 0 0 3175 Other
## 57 144 0 21 110 3 1 0 0 1 0 3203 Other
## 58 145 0 30 153 3 0 0 0 0 0 3203 Other
## 59 146 0 20 103 3 0 0 0 0 0 3203 Other
## 60 147 0 17 119 3 0 0 0 0 0 3225 Other
## 61 148 0 17 119 3 0 0 0 0 0 3225 Other
## 62 149 0 23 119 3 0 0 0 0 2 3232 Other
## 63 150 0 24 110 3 0 0 0 0 0 3232 Other
## 64 151 0 28 140 1 0 0 0 0 0 3234 White
## 65 154 0 26 133 3 1 2 0 0 0 3260 Other
## 66 155 0 20 169 3 0 1 0 1 1 3274 Other
## 67 156 0 24 115 3 0 0 0 0 2 3274 Other
## 68 159 0 28 250 3 1 0 0 0 6 3303 Other
## 69 160 0 20 141 1 0 2 0 1 1 3317 White
## 70 161 0 22 158 2 0 1 0 0 2 3317 Black
## 71 162 0 22 112 1 1 2 0 0 0 3317 White
## 72 163 0 31 150 3 1 0 0 0 2 3321 Other
## 73 164 0 23 115 3 1 0 0 0 1 3331 Other
## 74 166 0 16 112 2 0 0 0 0 0 3374 Black
## 75 167 0 16 135 1 1 0 0 0 0 3374 White
## 76 168 0 18 229 2 0 0 0 0 0 3402 Black
## 77 169 0 25 140 1 0 0 0 0 1 3416 White
## 78 170 0 32 134 1 1 1 0 0 4 3430 White
## 79 172 0 20 121 2 1 0 0 0 0 3444 Black
## 80 173 0 23 190 1 0 0 0 0 0 3459 White
## 81 174 0 22 131 1 0 0 0 0 1 3460 White
## 82 175 0 32 170 1 0 0 0 0 0 3473 White
## 83 176 0 30 110 3 0 0 0 0 0 3544 Other
## 84 177 0 20 127 3 0 0 0 0 0 3487 Other
## 85 179 0 23 123 3 0 0 0 0 0 3544 Other
## 86 180 0 17 120 3 1 0 0 0 0 3572 Other
## 87 181 0 19 105 3 0 0 0 0 0 3572 Other
## 88 182 0 23 130 1 0 0 0 0 0 3586 White
## 89 183 0 36 175 1 0 0 0 0 0 3600 White
## 90 184 0 22 125 1 0 0 0 0 1 3614 White
## 91 185 0 24 133 1 0 0 0 0 0 3614 White
## 92 186 0 21 134 3 0 0 0 0 2 3629 Other
## 93 187 0 19 235 1 1 0 1 0 0 3629 White
## 94 188 0 25 95 1 1 3 0 1 0 3637 White
## 95 189 0 16 135 1 1 0 0 0 0 3643 White
## 96 190 0 29 135 1 0 0 0 0 1 3651 White
## 97 191 0 29 154 1 0 0 0 0 1 3651 White
## 98 192 0 19 147 1 1 0 0 0 0 3651 White
## 99 193 0 19 147 1 1 0 0 0 0 3651 White
## 100 195 0 30 137 1 0 0 0 0 1 3699 White
## 101 196 0 24 110 1 0 0 0 0 1 3728 White
## 102 197 0 19 184 1 1 0 1 0 0 3756 White
## 103 199 0 24 110 3 0 1 0 0 0 3770 Other
## 104 200 0 23 110 1 0 0 0 0 1 3770 White
## 105 201 0 20 120 3 0 0 0 0 0 3770 Other
## 106 202 0 25 241 2 0 0 1 0 0 3790 Black
## 107 203 0 30 112 1 0 0 0 0 1 3799 White
## 108 204 0 22 169 1 0 0 0 0 0 3827 White
## 109 205 0 18 120 1 1 0 0 0 2 3856 White
## 110 206 0 16 170 2 0 0 0 0 4 3860 Black
## 111 207 0 32 186 1 0 0 0 0 2 3860 White
## 112 208 0 18 120 3 0 0 0 0 1 3884 Other
## 113 209 0 29 130 1 1 0 0 0 2 3884 White
## 114 210 0 33 117 1 0 0 0 1 1 3912 White
## 115 211 0 20 170 1 1 0 0 0 0 3940 White
## 116 212 0 28 134 3 0 0 0 0 1 3941 Other
## 117 213 0 14 135 1 0 0 0 0 0 3941 White
## 118 214 0 28 130 3 0 0 0 0 0 3969 Other
## 119 215 0 25 120 1 0 0 0 0 2 3983 White
## 120 216 0 16 95 3 0 0 0 0 1 3997 Other
## 121 217 0 20 158 1 0 0 0 0 1 3997 White
## 122 218 0 26 160 3 0 0 0 0 0 4054 Other
## 123 219 0 21 115 1 0 0 0 0 1 4054 White
## 124 220 0 22 129 1 0 0 0 0 0 4111 White
## 125 221 0 25 130 1 0 0 0 0 2 4153 White
## 126 222 0 31 120 1 0 0 0 0 2 4167 White
## 127 223 0 35 170 1 0 1 0 0 1 4174 White
## 128 224 0 19 120 1 1 0 0 0 0 4238 White
## 129 225 0 24 116 1 0 0 0 0 1 4593 White
## 130 226 0 45 123 1 0 0 0 0 1 4990 White
## 131 4 1 28 120 3 1 1 0 1 0 709 Other
## 132 10 1 29 130 1 0 0 0 1 2 1021 White
## 133 11 1 34 187 2 1 0 1 0 0 1135 Black
## 134 13 1 25 105 3 0 1 1 0 0 1330 Other
## 135 15 1 25 85 3 0 0 0 1 0 1474 Other
## 136 16 1 27 150 3 0 0 0 0 0 1588 Other
## 137 17 1 23 97 3 0 0 0 1 1 1588 Other
## 138 18 1 24 128 2 0 1 0 0 1 1701 Black
## 139 19 1 24 132 3 0 0 1 0 0 1729 Other
## 140 20 1 21 165 1 1 0 1 0 1 1790 White
## 141 22 1 32 105 1 1 0 0 0 0 1818 White
## 142 23 1 19 91 1 1 2 0 1 0 1885 White
## 143 24 1 25 115 3 0 0 0 0 0 1893 Other
## 144 25 1 16 130 3 0 0 0 0 1 1899 Other
## 145 26 1 25 92 1 1 0 0 0 0 1928 White
## 146 27 1 20 150 1 1 0 0 0 2 1928 White
## 147 28 1 21 200 2 0 0 0 1 2 1928 Black
## 148 29 1 24 155 1 1 1 0 0 0 1936 White
## 149 30 1 21 103 3 0 0 0 0 0 1970 Other
## 150 31 1 20 125 3 0 0 0 1 0 2055 Other
## 151 32 1 25 89 3 0 2 0 0 1 2055 Other
## 152 33 1 19 102 1 0 0 0 0 2 2082 White
## 153 34 1 19 112 1 1 0 0 1 0 2084 White
## 154 35 1 26 117 1 1 1 0 0 0 2084 White
## 155 36 1 24 138 1 0 0 0 0 0 2100 White
## 156 37 1 17 130 3 1 1 0 1 0 2125 Other
## 157 40 1 20 120 2 1 0 0 0 3 2126 Black
## 158 42 1 22 130 1 1 1 0 1 1 2187 White
## 159 43 1 27 130 2 0 0 0 1 0 2187 Black
## 160 44 1 20 80 3 1 0 0 1 0 2211 Other
## 161 45 1 17 110 1 1 0 0 0 0 2225 White
## 162 46 1 25 105 3 0 1 0 0 1 2240 Other
## 163 47 1 20 109 3 0 0 0 0 0 2240 Other
## 164 49 1 18 148 3 0 0 0 0 0 2282 Other
## 165 50 1 18 110 2 1 1 0 0 0 2296 Black
## 166 51 1 20 121 1 1 1 0 1 0 2296 White
## 167 52 1 21 100 3 0 1 0 0 4 2301 Other
## 168 54 1 26 96 3 0 0 0 0 0 2325 Other
## 169 56 1 31 102 1 1 1 0 0 1 2353 White
## 170 57 1 15 110 1 0 0 0 0 0 2353 White
## 171 59 1 23 187 2 1 0 0 0 1 2367 Black
## 172 60 1 20 122 2 1 0 0 0 0 2381 Black
## 173 61 1 24 105 2 1 0 0 0 0 2381 Black
## 174 62 1 15 115 3 0 0 0 1 0 2381 Other
## 175 63 1 23 120 3 0 0 0 0 0 2410 Other
## 176 65 1 30 142 1 1 1 0 0 0 2410 White
## 177 67 1 22 130 1 1 0 0 0 1 2410 White
## 178 68 1 17 120 1 1 0 0 0 3 2414 White
## 179 69 1 23 110 1 1 1 0 0 0 2424 White
## 180 71 1 17 120 2 0 0 0 0 2 2438 Black
## 181 75 1 26 154 3 0 1 1 0 1 2442 Other
## 182 76 1 20 105 3 0 0 0 0 3 2450 Other
## 183 77 1 26 190 1 1 0 0 0 0 2466 White
## 184 78 1 14 101 3 1 1 0 0 0 2466 Other
## 185 79 1 28 95 1 1 0 0 0 2 2466 White
## 186 81 1 14 100 3 0 0 0 0 2 2495 Other
## 187 82 1 23 94 3 1 0 0 0 0 2495 Other
## 188 83 1 17 142 2 0 0 1 0 0 2495 Black
## 189 84 1 21 130 1 1 0 1 0 3 2495 White
table(bw$ethnicity, bw$race)
##
## 1 2 3
## White 96 0 0
## Black 0 26 0
## Other 0 0 67
bw1 <- bw %>% select("id", "low", "bwt")
Số lượng biến số và quan sát trong dữ liệu bw1
dim(bw1)
## [1] 189 3
###5.4 Tạo 1 tập dữ liệu bw3 chỉ gồm những thai phụ có cân nặng thấp (low = 1). Dữ liệu này có bao nhiêu biến số và quan sát?
bw3 <- bw %>%
filter(low == 1)
#Số lượng biến số và quan sát
dim(bw3)
## [1] 59 12
###5.5 Tạo 1 tập dữ liệu bw4 chỉ gồm những thai phụ có cân nặng thấp (low = 1) và có hút thuốc trong lúc mang thai (smoke = 1). Dữ liệu này có bao nhiêu biến số và quan sát?
bw4 <- bw %>% filter(low %in% 1 & smoke %in% 1)
table(bw4$ethnicity, bw4$race)
##
## 1 2 3
## White 19 0 0
## Black 0 6 0
## Other 0 0 5
dim(bw4)
## [1] 30 12
##Việc 6. Sử dụng gói lệnh lessR
###6.1 Vẽ biểu đồ phân bố histogram cân nặng của con (bwt)
library(lessR)
## Warning: package 'lessR' was built under R version 4.4.1
##
## lessR 4.4.2 feedback: gerbing@pdx.edu
## --------------------------------------------------------------
## > d <- Read("") Read data file, many formats available, e.g., Excel
## d is default data frame, data= in analysis routines optional
##
## Many examples of reading, writing, and manipulating data,
## graphics, testing means and proportions, regression, factor analysis,
## customization, forecasting, and aggregation from pivot tables
## Enter: browseVignettes("lessR")
##
## View lessR updates, now including time series forecasting
## Enter: news(package="lessR")
##
## Interactive data analysis
## Enter: interact()
##
## Attaching package: 'lessR'
## The following objects are masked from 'package:dplyr':
##
## order_by, recode, rename
## The following object is masked from 'package:base':
##
## sort_by
Histogram(bwt, data = bw, fill = "steelblue", xlab = "Birthweight (g)", ylab = "Frequency")
## >>> Suggestions
## bin_width: set the width of each bin
## bin_start: set the start of the first bin
## bin_end: set the end of the last bin
## Histogram(bwt, density=TRUE) # smoothed curve + histogram
## Plot(bwt) # Violin/Box/Scatterplot (VBS) plot
##
## --- bwt ---
##
## n miss mean sd min mdn max
## 189 0 2944.59 729.21 709.00 2977.00 4990.00
##
##
##
## --- Outliers --- from the box plot: 1
##
## Small Large
## ----- -----
## 709.0
##
##
## Bin Width: 500
## Number of Bins: 9
##
## Bin Midpnt Count Prop Cumul.c Cumul.p
## -----------------------------------------------------
## 500 > 1000 750 1 0.01 1 0.01
## 1000 > 1500 1250 4 0.02 5 0.03
## 1500 > 2000 1750 14 0.07 19 0.10
## 2000 > 2500 2250 40 0.21 59 0.31
## 2500 > 3000 2750 38 0.20 97 0.51
## 3000 > 3500 3250 45 0.24 142 0.75
## 3500 > 4000 3750 38 0.20 180 0.95
## 4000 > 4500 4250 7 0.04 187 0.99
## 4500 > 5000 4750 2 0.01 189 1.00
##
###6.2 Vẽ biểu đồ thanh (bar chart) chủng tộc (ethnicity)
BarChart(ethnicity, data = bw)
## >>> Suggestions
## BarChart(ethnicity, horiz=TRUE) # horizontal bar chart
## BarChart(ethnicity, fill="reds") # red bars of varying lightness
## PieChart(ethnicity) # doughnut (ring) chart
## Plot(ethnicity) # bubble plot
## Plot(ethnicity, stat="count") # lollipop plot
##
## --- ethnicity ---
##
## Missing Values: 0
##
## White Black Other Total
## Frequencies: 96 26 67 189
## Proportions: 0.508 0.138 0.354 1.000
##
## Chi-squared test of null hypothesis of equal probabilities
## Chisq = 39.270, df = 2, p-value = 0.000
###6.3 Vẽ biểu đồ tương quan giữa cân nặng của mẹ (lwt) và cân nặng của con (bwt)
ScatterPlot(lwt, bwt, fit = "lm" , data = bw)
##
##
## >>> Suggestions or enter: style(suggest=FALSE)
## Plot(lwt, bwt, enhance=TRUE) # many options
## Plot(lwt, bwt, fill="skyblue") # interior fill color of points
## Plot(lwt, bwt, MD_cut=6) # Mahalanobis distance from center > 6 is an outlier
##
##
## >>> Pearson's product-moment correlation
##
## Number of paired values with neither missing, n = 189
## Sample Correlation of lwt and bwt: r = 0.186
##
## Hypothesis Test of 0 Correlation: t = 2.585, df = 187, p-value = 0.011
## 95% Confidence Interval for Correlation: 0.044 to 0.320
##
##
## Line: b0 = 2369.624 b1 = 4.429 Linear Model MSE = 516,155.173 Rsq = 0.034
##