Khoá học sử dụng R - Ngày 1

Việc 3: Đọc dữ liệu birth.csv

bw <- read.csv("birthwt.csv")

Việc 4: Thông tin về dữ liệu bw

###4.1 Có bao nhiêu biến số (variable) và quan sát (observation)

dim(bw)
## [1] 189  11

4.2 Liệt kê 6 quan sát đầu tiên của dữ liệu.

head(bw, 6)
##   id low age lwt race smoke ptl ht ui ftv  bwt
## 1 85   0  19 182    2     0   0  0  1   0 2523
## 2 86   0  33 155    3     0   0  0  0   3 2551
## 3 87   0  20 105    1     1   0  0  0   1 2557
## 4 88   0  21 108    1     1   0  0  1   2 2594
## 5 89   0  18 107    1     1   0  0  1   0 2600
## 6 91   0  21 124    3     0   0  0  0   0 2622

10 dong cuoi

tail(bw, 10)
##     id low age lwt race smoke ptl ht ui ftv  bwt
## 180 71   1  17 120    2     0   0  0  0   2 2438
## 181 75   1  26 154    3     0   1  1  0   1 2442
## 182 76   1  20 105    3     0   0  0  0   3 2450
## 183 77   1  26 190    1     1   0  0  0   0 2466
## 184 78   1  14 101    3     1   1  0  0   0 2466
## 185 79   1  28  95    1     1   0  0  0   2 2466
## 186 81   1  14 100    3     0   0  0  0   2 2495
## 187 82   1  23  94    3     1   0  0  0   0 2495
## 188 83   1  17 142    2     0   0  1  0   0 2495
## 189 84   1  21 130    1     1   0  1  0   3 2495

##Việc 5: Biên tập dữ liệu ###5.1 Tạo biến số mới mwt là cân nặng của mẹ tính bằng kg

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
bw %>% 
  mutate(mwt = round(lwt * 0.453592, digits = 2))
##      id low age lwt race smoke ptl ht ui ftv  bwt    mwt
## 1    85   0  19 182    2     0   0  0  1   0 2523  82.55
## 2    86   0  33 155    3     0   0  0  0   3 2551  70.31
## 3    87   0  20 105    1     1   0  0  0   1 2557  47.63
## 4    88   0  21 108    1     1   0  0  1   2 2594  48.99
## 5    89   0  18 107    1     1   0  0  1   0 2600  48.53
## 6    91   0  21 124    3     0   0  0  0   0 2622  56.25
## 7    92   0  22 118    1     0   0  0  0   1 2637  53.52
## 8    93   0  17 103    3     0   0  0  0   1 2637  46.72
## 9    94   0  29 123    1     1   0  0  0   1 2663  55.79
## 10   95   0  26 113    1     1   0  0  0   0 2665  51.26
## 11   96   0  19  95    3     0   0  0  0   0 2722  43.09
## 12   97   0  19 150    3     0   0  0  0   1 2733  68.04
## 13   98   0  22  95    3     0   0  1  0   0 2751  43.09
## 14   99   0  30 107    3     0   1  0  1   2 2750  48.53
## 15  100   0  18 100    1     1   0  0  0   0 2769  45.36
## 16  101   0  18 100    1     1   0  0  0   0 2769  45.36
## 17  102   0  15  98    2     0   0  0  0   0 2778  44.45
## 18  103   0  25 118    1     1   0  0  0   3 2782  53.52
## 19  104   0  20 120    3     0   0  0  1   0 2807  54.43
## 20  105   0  28 120    1     1   0  0  0   1 2821  54.43
## 21  106   0  32 121    3     0   0  0  0   2 2835  54.88
## 22  107   0  31 100    1     0   0  0  1   3 2835  45.36
## 23  108   0  36 202    1     0   0  0  0   1 2836  91.63
## 24  109   0  28 120    3     0   0  0  0   0 2863  54.43
## 25  111   0  25 120    3     0   0  0  1   2 2877  54.43
## 26  112   0  28 167    1     0   0  0  0   0 2877  75.75
## 27  113   0  17 122    1     1   0  0  0   0 2906  55.34
## 28  114   0  29 150    1     0   0  0  0   2 2920  68.04
## 29  115   0  26 168    2     1   0  0  0   0 2920  76.20
## 30  116   0  17 113    2     0   0  0  0   1 2920  51.26
## 31  117   0  17 113    2     0   0  0  0   1 2920  51.26
## 32  118   0  24  90    1     1   1  0  0   1 2948  40.82
## 33  119   0  35 121    2     1   1  0  0   1 2948  54.88
## 34  120   0  25 155    1     0   0  0  0   1 2977  70.31
## 35  121   0  25 125    2     0   0  0  0   0 2977  56.70
## 36  123   0  29 140    1     1   0  0  0   2 2977  63.50
## 37  124   0  19 138    1     1   0  0  0   2 2977  62.60
## 38  125   0  27 124    1     1   0  0  0   0 2922  56.25
## 39  126   0  31 215    1     1   0  0  0   2 3005  97.52
## 40  127   0  33 109    1     1   0  0  0   1 3033  49.44
## 41  128   0  21 185    2     1   0  0  0   2 3042  83.91
## 42  129   0  19 189    1     0   0  0  0   2 3062  85.73
## 43  130   0  23 130    2     0   0  0  0   1 3062  58.97
## 44  131   0  21 160    1     0   0  0  0   0 3062  72.57
## 45  132   0  18  90    1     1   0  0  1   0 3062  40.82
## 46  133   0  18  90    1     1   0  0  1   0 3062  40.82
## 47  134   0  32 132    1     0   0  0  0   4 3080  59.87
## 48  135   0  19 132    3     0   0  0  0   0 3090  59.87
## 49  136   0  24 115    1     0   0  0  0   2 3090  52.16
## 50  137   0  22  85    3     1   0  0  0   0 3090  38.56
## 51  138   0  22 120    1     0   0  1  0   1 3100  54.43
## 52  139   0  23 128    3     0   0  0  0   0 3104  58.06
## 53  140   0  22 130    1     1   0  0  0   0 3132  58.97
## 54  141   0  30  95    1     1   0  0  0   2 3147  43.09
## 55  142   0  19 115    3     0   0  0  0   0 3175  52.16
## 56  143   0  16 110    3     0   0  0  0   0 3175  49.90
## 57  144   0  21 110    3     1   0  0  1   0 3203  49.90
## 58  145   0  30 153    3     0   0  0  0   0 3203  69.40
## 59  146   0  20 103    3     0   0  0  0   0 3203  46.72
## 60  147   0  17 119    3     0   0  0  0   0 3225  53.98
## 61  148   0  17 119    3     0   0  0  0   0 3225  53.98
## 62  149   0  23 119    3     0   0  0  0   2 3232  53.98
## 63  150   0  24 110    3     0   0  0  0   0 3232  49.90
## 64  151   0  28 140    1     0   0  0  0   0 3234  63.50
## 65  154   0  26 133    3     1   2  0  0   0 3260  60.33
## 66  155   0  20 169    3     0   1  0  1   1 3274  76.66
## 67  156   0  24 115    3     0   0  0  0   2 3274  52.16
## 68  159   0  28 250    3     1   0  0  0   6 3303 113.40
## 69  160   0  20 141    1     0   2  0  1   1 3317  63.96
## 70  161   0  22 158    2     0   1  0  0   2 3317  71.67
## 71  162   0  22 112    1     1   2  0  0   0 3317  50.80
## 72  163   0  31 150    3     1   0  0  0   2 3321  68.04
## 73  164   0  23 115    3     1   0  0  0   1 3331  52.16
## 74  166   0  16 112    2     0   0  0  0   0 3374  50.80
## 75  167   0  16 135    1     1   0  0  0   0 3374  61.23
## 76  168   0  18 229    2     0   0  0  0   0 3402 103.87
## 77  169   0  25 140    1     0   0  0  0   1 3416  63.50
## 78  170   0  32 134    1     1   1  0  0   4 3430  60.78
## 79  172   0  20 121    2     1   0  0  0   0 3444  54.88
## 80  173   0  23 190    1     0   0  0  0   0 3459  86.18
## 81  174   0  22 131    1     0   0  0  0   1 3460  59.42
## 82  175   0  32 170    1     0   0  0  0   0 3473  77.11
## 83  176   0  30 110    3     0   0  0  0   0 3544  49.90
## 84  177   0  20 127    3     0   0  0  0   0 3487  57.61
## 85  179   0  23 123    3     0   0  0  0   0 3544  55.79
## 86  180   0  17 120    3     1   0  0  0   0 3572  54.43
## 87  181   0  19 105    3     0   0  0  0   0 3572  47.63
## 88  182   0  23 130    1     0   0  0  0   0 3586  58.97
## 89  183   0  36 175    1     0   0  0  0   0 3600  79.38
## 90  184   0  22 125    1     0   0  0  0   1 3614  56.70
## 91  185   0  24 133    1     0   0  0  0   0 3614  60.33
## 92  186   0  21 134    3     0   0  0  0   2 3629  60.78
## 93  187   0  19 235    1     1   0  1  0   0 3629 106.59
## 94  188   0  25  95    1     1   3  0  1   0 3637  43.09
## 95  189   0  16 135    1     1   0  0  0   0 3643  61.23
## 96  190   0  29 135    1     0   0  0  0   1 3651  61.23
## 97  191   0  29 154    1     0   0  0  0   1 3651  69.85
## 98  192   0  19 147    1     1   0  0  0   0 3651  66.68
## 99  193   0  19 147    1     1   0  0  0   0 3651  66.68
## 100 195   0  30 137    1     0   0  0  0   1 3699  62.14
## 101 196   0  24 110    1     0   0  0  0   1 3728  49.90
## 102 197   0  19 184    1     1   0  1  0   0 3756  83.46
## 103 199   0  24 110    3     0   1  0  0   0 3770  49.90
## 104 200   0  23 110    1     0   0  0  0   1 3770  49.90
## 105 201   0  20 120    3     0   0  0  0   0 3770  54.43
## 106 202   0  25 241    2     0   0  1  0   0 3790 109.32
## 107 203   0  30 112    1     0   0  0  0   1 3799  50.80
## 108 204   0  22 169    1     0   0  0  0   0 3827  76.66
## 109 205   0  18 120    1     1   0  0  0   2 3856  54.43
## 110 206   0  16 170    2     0   0  0  0   4 3860  77.11
## 111 207   0  32 186    1     0   0  0  0   2 3860  84.37
## 112 208   0  18 120    3     0   0  0  0   1 3884  54.43
## 113 209   0  29 130    1     1   0  0  0   2 3884  58.97
## 114 210   0  33 117    1     0   0  0  1   1 3912  53.07
## 115 211   0  20 170    1     1   0  0  0   0 3940  77.11
## 116 212   0  28 134    3     0   0  0  0   1 3941  60.78
## 117 213   0  14 135    1     0   0  0  0   0 3941  61.23
## 118 214   0  28 130    3     0   0  0  0   0 3969  58.97
## 119 215   0  25 120    1     0   0  0  0   2 3983  54.43
## 120 216   0  16  95    3     0   0  0  0   1 3997  43.09
## 121 217   0  20 158    1     0   0  0  0   1 3997  71.67
## 122 218   0  26 160    3     0   0  0  0   0 4054  72.57
## 123 219   0  21 115    1     0   0  0  0   1 4054  52.16
## 124 220   0  22 129    1     0   0  0  0   0 4111  58.51
## 125 221   0  25 130    1     0   0  0  0   2 4153  58.97
## 126 222   0  31 120    1     0   0  0  0   2 4167  54.43
## 127 223   0  35 170    1     0   1  0  0   1 4174  77.11
## 128 224   0  19 120    1     1   0  0  0   0 4238  54.43
## 129 225   0  24 116    1     0   0  0  0   1 4593  52.62
## 130 226   0  45 123    1     0   0  0  0   1 4990  55.79
## 131   4   1  28 120    3     1   1  0  1   0  709  54.43
## 132  10   1  29 130    1     0   0  0  1   2 1021  58.97
## 133  11   1  34 187    2     1   0  1  0   0 1135  84.82
## 134  13   1  25 105    3     0   1  1  0   0 1330  47.63
## 135  15   1  25  85    3     0   0  0  1   0 1474  38.56
## 136  16   1  27 150    3     0   0  0  0   0 1588  68.04
## 137  17   1  23  97    3     0   0  0  1   1 1588  44.00
## 138  18   1  24 128    2     0   1  0  0   1 1701  58.06
## 139  19   1  24 132    3     0   0  1  0   0 1729  59.87
## 140  20   1  21 165    1     1   0  1  0   1 1790  74.84
## 141  22   1  32 105    1     1   0  0  0   0 1818  47.63
## 142  23   1  19  91    1     1   2  0  1   0 1885  41.28
## 143  24   1  25 115    3     0   0  0  0   0 1893  52.16
## 144  25   1  16 130    3     0   0  0  0   1 1899  58.97
## 145  26   1  25  92    1     1   0  0  0   0 1928  41.73
## 146  27   1  20 150    1     1   0  0  0   2 1928  68.04
## 147  28   1  21 200    2     0   0  0  1   2 1928  90.72
## 148  29   1  24 155    1     1   1  0  0   0 1936  70.31
## 149  30   1  21 103    3     0   0  0  0   0 1970  46.72
## 150  31   1  20 125    3     0   0  0  1   0 2055  56.70
## 151  32   1  25  89    3     0   2  0  0   1 2055  40.37
## 152  33   1  19 102    1     0   0  0  0   2 2082  46.27
## 153  34   1  19 112    1     1   0  0  1   0 2084  50.80
## 154  35   1  26 117    1     1   1  0  0   0 2084  53.07
## 155  36   1  24 138    1     0   0  0  0   0 2100  62.60
## 156  37   1  17 130    3     1   1  0  1   0 2125  58.97
## 157  40   1  20 120    2     1   0  0  0   3 2126  54.43
## 158  42   1  22 130    1     1   1  0  1   1 2187  58.97
## 159  43   1  27 130    2     0   0  0  1   0 2187  58.97
## 160  44   1  20  80    3     1   0  0  1   0 2211  36.29
## 161  45   1  17 110    1     1   0  0  0   0 2225  49.90
## 162  46   1  25 105    3     0   1  0  0   1 2240  47.63
## 163  47   1  20 109    3     0   0  0  0   0 2240  49.44
## 164  49   1  18 148    3     0   0  0  0   0 2282  67.13
## 165  50   1  18 110    2     1   1  0  0   0 2296  49.90
## 166  51   1  20 121    1     1   1  0  1   0 2296  54.88
## 167  52   1  21 100    3     0   1  0  0   4 2301  45.36
## 168  54   1  26  96    3     0   0  0  0   0 2325  43.54
## 169  56   1  31 102    1     1   1  0  0   1 2353  46.27
## 170  57   1  15 110    1     0   0  0  0   0 2353  49.90
## 171  59   1  23 187    2     1   0  0  0   1 2367  84.82
## 172  60   1  20 122    2     1   0  0  0   0 2381  55.34
## 173  61   1  24 105    2     1   0  0  0   0 2381  47.63
## 174  62   1  15 115    3     0   0  0  1   0 2381  52.16
## 175  63   1  23 120    3     0   0  0  0   0 2410  54.43
## 176  65   1  30 142    1     1   1  0  0   0 2410  64.41
## 177  67   1  22 130    1     1   0  0  0   1 2410  58.97
## 178  68   1  17 120    1     1   0  0  0   3 2414  54.43
## 179  69   1  23 110    1     1   1  0  0   0 2424  49.90
## 180  71   1  17 120    2     0   0  0  0   2 2438  54.43
## 181  75   1  26 154    3     0   1  1  0   1 2442  69.85
## 182  76   1  20 105    3     0   0  0  0   3 2450  47.63
## 183  77   1  26 190    1     1   0  0  0   0 2466  86.18
## 184  78   1  14 101    3     1   1  0  0   0 2466  45.81
## 185  79   1  28  95    1     1   0  0  0   2 2466  43.09
## 186  81   1  14 100    3     0   0  0  0   2 2495  45.36
## 187  82   1  23  94    3     1   0  0  0   0 2495  42.64
## 188  83   1  17 142    2     0   0  1  0   0 2495  64.41
## 189  84   1  21 130    1     1   0  1  0   3 2495  58.97

###5.2 Tạo biến số mới ethnicity là biến factor với điều kiện sau: Nếu race = 1 thì ethnicity = “White” Nếu race = 2 thì ethnicity = “Black” Nếu race = 3 thì ethnicity = “Other”

bw <- bw %>% 
  mutate(ethnicity = recode_factor(race, 
                                   "1" = "White", 
                                   "2" = "Black",
                                   "3" = "Other"))

bw %>%  
  mutate(ethnicity = factor(race, 
                            levels = c(1, 2, 3), 
                            labels = c("White", "Black", "Other")))
##      id low age lwt race smoke ptl ht ui ftv  bwt ethnicity
## 1    85   0  19 182    2     0   0  0  1   0 2523     Black
## 2    86   0  33 155    3     0   0  0  0   3 2551     Other
## 3    87   0  20 105    1     1   0  0  0   1 2557     White
## 4    88   0  21 108    1     1   0  0  1   2 2594     White
## 5    89   0  18 107    1     1   0  0  1   0 2600     White
## 6    91   0  21 124    3     0   0  0  0   0 2622     Other
## 7    92   0  22 118    1     0   0  0  0   1 2637     White
## 8    93   0  17 103    3     0   0  0  0   1 2637     Other
## 9    94   0  29 123    1     1   0  0  0   1 2663     White
## 10   95   0  26 113    1     1   0  0  0   0 2665     White
## 11   96   0  19  95    3     0   0  0  0   0 2722     Other
## 12   97   0  19 150    3     0   0  0  0   1 2733     Other
## 13   98   0  22  95    3     0   0  1  0   0 2751     Other
## 14   99   0  30 107    3     0   1  0  1   2 2750     Other
## 15  100   0  18 100    1     1   0  0  0   0 2769     White
## 16  101   0  18 100    1     1   0  0  0   0 2769     White
## 17  102   0  15  98    2     0   0  0  0   0 2778     Black
## 18  103   0  25 118    1     1   0  0  0   3 2782     White
## 19  104   0  20 120    3     0   0  0  1   0 2807     Other
## 20  105   0  28 120    1     1   0  0  0   1 2821     White
## 21  106   0  32 121    3     0   0  0  0   2 2835     Other
## 22  107   0  31 100    1     0   0  0  1   3 2835     White
## 23  108   0  36 202    1     0   0  0  0   1 2836     White
## 24  109   0  28 120    3     0   0  0  0   0 2863     Other
## 25  111   0  25 120    3     0   0  0  1   2 2877     Other
## 26  112   0  28 167    1     0   0  0  0   0 2877     White
## 27  113   0  17 122    1     1   0  0  0   0 2906     White
## 28  114   0  29 150    1     0   0  0  0   2 2920     White
## 29  115   0  26 168    2     1   0  0  0   0 2920     Black
## 30  116   0  17 113    2     0   0  0  0   1 2920     Black
## 31  117   0  17 113    2     0   0  0  0   1 2920     Black
## 32  118   0  24  90    1     1   1  0  0   1 2948     White
## 33  119   0  35 121    2     1   1  0  0   1 2948     Black
## 34  120   0  25 155    1     0   0  0  0   1 2977     White
## 35  121   0  25 125    2     0   0  0  0   0 2977     Black
## 36  123   0  29 140    1     1   0  0  0   2 2977     White
## 37  124   0  19 138    1     1   0  0  0   2 2977     White
## 38  125   0  27 124    1     1   0  0  0   0 2922     White
## 39  126   0  31 215    1     1   0  0  0   2 3005     White
## 40  127   0  33 109    1     1   0  0  0   1 3033     White
## 41  128   0  21 185    2     1   0  0  0   2 3042     Black
## 42  129   0  19 189    1     0   0  0  0   2 3062     White
## 43  130   0  23 130    2     0   0  0  0   1 3062     Black
## 44  131   0  21 160    1     0   0  0  0   0 3062     White
## 45  132   0  18  90    1     1   0  0  1   0 3062     White
## 46  133   0  18  90    1     1   0  0  1   0 3062     White
## 47  134   0  32 132    1     0   0  0  0   4 3080     White
## 48  135   0  19 132    3     0   0  0  0   0 3090     Other
## 49  136   0  24 115    1     0   0  0  0   2 3090     White
## 50  137   0  22  85    3     1   0  0  0   0 3090     Other
## 51  138   0  22 120    1     0   0  1  0   1 3100     White
## 52  139   0  23 128    3     0   0  0  0   0 3104     Other
## 53  140   0  22 130    1     1   0  0  0   0 3132     White
## 54  141   0  30  95    1     1   0  0  0   2 3147     White
## 55  142   0  19 115    3     0   0  0  0   0 3175     Other
## 56  143   0  16 110    3     0   0  0  0   0 3175     Other
## 57  144   0  21 110    3     1   0  0  1   0 3203     Other
## 58  145   0  30 153    3     0   0  0  0   0 3203     Other
## 59  146   0  20 103    3     0   0  0  0   0 3203     Other
## 60  147   0  17 119    3     0   0  0  0   0 3225     Other
## 61  148   0  17 119    3     0   0  0  0   0 3225     Other
## 62  149   0  23 119    3     0   0  0  0   2 3232     Other
## 63  150   0  24 110    3     0   0  0  0   0 3232     Other
## 64  151   0  28 140    1     0   0  0  0   0 3234     White
## 65  154   0  26 133    3     1   2  0  0   0 3260     Other
## 66  155   0  20 169    3     0   1  0  1   1 3274     Other
## 67  156   0  24 115    3     0   0  0  0   2 3274     Other
## 68  159   0  28 250    3     1   0  0  0   6 3303     Other
## 69  160   0  20 141    1     0   2  0  1   1 3317     White
## 70  161   0  22 158    2     0   1  0  0   2 3317     Black
## 71  162   0  22 112    1     1   2  0  0   0 3317     White
## 72  163   0  31 150    3     1   0  0  0   2 3321     Other
## 73  164   0  23 115    3     1   0  0  0   1 3331     Other
## 74  166   0  16 112    2     0   0  0  0   0 3374     Black
## 75  167   0  16 135    1     1   0  0  0   0 3374     White
## 76  168   0  18 229    2     0   0  0  0   0 3402     Black
## 77  169   0  25 140    1     0   0  0  0   1 3416     White
## 78  170   0  32 134    1     1   1  0  0   4 3430     White
## 79  172   0  20 121    2     1   0  0  0   0 3444     Black
## 80  173   0  23 190    1     0   0  0  0   0 3459     White
## 81  174   0  22 131    1     0   0  0  0   1 3460     White
## 82  175   0  32 170    1     0   0  0  0   0 3473     White
## 83  176   0  30 110    3     0   0  0  0   0 3544     Other
## 84  177   0  20 127    3     0   0  0  0   0 3487     Other
## 85  179   0  23 123    3     0   0  0  0   0 3544     Other
## 86  180   0  17 120    3     1   0  0  0   0 3572     Other
## 87  181   0  19 105    3     0   0  0  0   0 3572     Other
## 88  182   0  23 130    1     0   0  0  0   0 3586     White
## 89  183   0  36 175    1     0   0  0  0   0 3600     White
## 90  184   0  22 125    1     0   0  0  0   1 3614     White
## 91  185   0  24 133    1     0   0  0  0   0 3614     White
## 92  186   0  21 134    3     0   0  0  0   2 3629     Other
## 93  187   0  19 235    1     1   0  1  0   0 3629     White
## 94  188   0  25  95    1     1   3  0  1   0 3637     White
## 95  189   0  16 135    1     1   0  0  0   0 3643     White
## 96  190   0  29 135    1     0   0  0  0   1 3651     White
## 97  191   0  29 154    1     0   0  0  0   1 3651     White
## 98  192   0  19 147    1     1   0  0  0   0 3651     White
## 99  193   0  19 147    1     1   0  0  0   0 3651     White
## 100 195   0  30 137    1     0   0  0  0   1 3699     White
## 101 196   0  24 110    1     0   0  0  0   1 3728     White
## 102 197   0  19 184    1     1   0  1  0   0 3756     White
## 103 199   0  24 110    3     0   1  0  0   0 3770     Other
## 104 200   0  23 110    1     0   0  0  0   1 3770     White
## 105 201   0  20 120    3     0   0  0  0   0 3770     Other
## 106 202   0  25 241    2     0   0  1  0   0 3790     Black
## 107 203   0  30 112    1     0   0  0  0   1 3799     White
## 108 204   0  22 169    1     0   0  0  0   0 3827     White
## 109 205   0  18 120    1     1   0  0  0   2 3856     White
## 110 206   0  16 170    2     0   0  0  0   4 3860     Black
## 111 207   0  32 186    1     0   0  0  0   2 3860     White
## 112 208   0  18 120    3     0   0  0  0   1 3884     Other
## 113 209   0  29 130    1     1   0  0  0   2 3884     White
## 114 210   0  33 117    1     0   0  0  1   1 3912     White
## 115 211   0  20 170    1     1   0  0  0   0 3940     White
## 116 212   0  28 134    3     0   0  0  0   1 3941     Other
## 117 213   0  14 135    1     0   0  0  0   0 3941     White
## 118 214   0  28 130    3     0   0  0  0   0 3969     Other
## 119 215   0  25 120    1     0   0  0  0   2 3983     White
## 120 216   0  16  95    3     0   0  0  0   1 3997     Other
## 121 217   0  20 158    1     0   0  0  0   1 3997     White
## 122 218   0  26 160    3     0   0  0  0   0 4054     Other
## 123 219   0  21 115    1     0   0  0  0   1 4054     White
## 124 220   0  22 129    1     0   0  0  0   0 4111     White
## 125 221   0  25 130    1     0   0  0  0   2 4153     White
## 126 222   0  31 120    1     0   0  0  0   2 4167     White
## 127 223   0  35 170    1     0   1  0  0   1 4174     White
## 128 224   0  19 120    1     1   0  0  0   0 4238     White
## 129 225   0  24 116    1     0   0  0  0   1 4593     White
## 130 226   0  45 123    1     0   0  0  0   1 4990     White
## 131   4   1  28 120    3     1   1  0  1   0  709     Other
## 132  10   1  29 130    1     0   0  0  1   2 1021     White
## 133  11   1  34 187    2     1   0  1  0   0 1135     Black
## 134  13   1  25 105    3     0   1  1  0   0 1330     Other
## 135  15   1  25  85    3     0   0  0  1   0 1474     Other
## 136  16   1  27 150    3     0   0  0  0   0 1588     Other
## 137  17   1  23  97    3     0   0  0  1   1 1588     Other
## 138  18   1  24 128    2     0   1  0  0   1 1701     Black
## 139  19   1  24 132    3     0   0  1  0   0 1729     Other
## 140  20   1  21 165    1     1   0  1  0   1 1790     White
## 141  22   1  32 105    1     1   0  0  0   0 1818     White
## 142  23   1  19  91    1     1   2  0  1   0 1885     White
## 143  24   1  25 115    3     0   0  0  0   0 1893     Other
## 144  25   1  16 130    3     0   0  0  0   1 1899     Other
## 145  26   1  25  92    1     1   0  0  0   0 1928     White
## 146  27   1  20 150    1     1   0  0  0   2 1928     White
## 147  28   1  21 200    2     0   0  0  1   2 1928     Black
## 148  29   1  24 155    1     1   1  0  0   0 1936     White
## 149  30   1  21 103    3     0   0  0  0   0 1970     Other
## 150  31   1  20 125    3     0   0  0  1   0 2055     Other
## 151  32   1  25  89    3     0   2  0  0   1 2055     Other
## 152  33   1  19 102    1     0   0  0  0   2 2082     White
## 153  34   1  19 112    1     1   0  0  1   0 2084     White
## 154  35   1  26 117    1     1   1  0  0   0 2084     White
## 155  36   1  24 138    1     0   0  0  0   0 2100     White
## 156  37   1  17 130    3     1   1  0  1   0 2125     Other
## 157  40   1  20 120    2     1   0  0  0   3 2126     Black
## 158  42   1  22 130    1     1   1  0  1   1 2187     White
## 159  43   1  27 130    2     0   0  0  1   0 2187     Black
## 160  44   1  20  80    3     1   0  0  1   0 2211     Other
## 161  45   1  17 110    1     1   0  0  0   0 2225     White
## 162  46   1  25 105    3     0   1  0  0   1 2240     Other
## 163  47   1  20 109    3     0   0  0  0   0 2240     Other
## 164  49   1  18 148    3     0   0  0  0   0 2282     Other
## 165  50   1  18 110    2     1   1  0  0   0 2296     Black
## 166  51   1  20 121    1     1   1  0  1   0 2296     White
## 167  52   1  21 100    3     0   1  0  0   4 2301     Other
## 168  54   1  26  96    3     0   0  0  0   0 2325     Other
## 169  56   1  31 102    1     1   1  0  0   1 2353     White
## 170  57   1  15 110    1     0   0  0  0   0 2353     White
## 171  59   1  23 187    2     1   0  0  0   1 2367     Black
## 172  60   1  20 122    2     1   0  0  0   0 2381     Black
## 173  61   1  24 105    2     1   0  0  0   0 2381     Black
## 174  62   1  15 115    3     0   0  0  1   0 2381     Other
## 175  63   1  23 120    3     0   0  0  0   0 2410     Other
## 176  65   1  30 142    1     1   1  0  0   0 2410     White
## 177  67   1  22 130    1     1   0  0  0   1 2410     White
## 178  68   1  17 120    1     1   0  0  0   3 2414     White
## 179  69   1  23 110    1     1   1  0  0   0 2424     White
## 180  71   1  17 120    2     0   0  0  0   2 2438     Black
## 181  75   1  26 154    3     0   1  1  0   1 2442     Other
## 182  76   1  20 105    3     0   0  0  0   3 2450     Other
## 183  77   1  26 190    1     1   0  0  0   0 2466     White
## 184  78   1  14 101    3     1   1  0  0   0 2466     Other
## 185  79   1  28  95    1     1   0  0  0   2 2466     White
## 186  81   1  14 100    3     0   0  0  0   2 2495     Other
## 187  82   1  23  94    3     1   0  0  0   0 2495     Other
## 188  83   1  17 142    2     0   0  1  0   0 2495     Black
## 189  84   1  21 130    1     1   0  1  0   3 2495     White
table(bw$ethnicity, bw$race)
##        
##          1  2  3
##   White 96  0  0
##   Black  0 26  0
##   Other  0  0 67

5.3 Tạo 1 tập dữ liệu bw1 chỉ gồm 3 biến số id, low và bwt. Dữ liệu này có bao nhiêu biến số và quan sát?

bw1 <- bw %>% select("id", "low", "bwt")

Số lượng biến số và quan sát trong dữ liệu bw1

dim(bw1)
## [1] 189   3

###5.4 Tạo 1 tập dữ liệu bw3 chỉ gồm những thai phụ có cân nặng thấp (low = 1). Dữ liệu này có bao nhiêu biến số và quan sát?

bw3 <- bw %>% 
  filter(low == 1)

#Số lượng biến số và quan sát
dim(bw3)
## [1] 59 12

###5.5 Tạo 1 tập dữ liệu bw4 chỉ gồm những thai phụ có cân nặng thấp (low = 1) và có hút thuốc trong lúc mang thai (smoke = 1). Dữ liệu này có bao nhiêu biến số và quan sát?

bw4 <- bw %>% filter(low %in% 1 & smoke %in% 1)

  table(bw4$ethnicity, bw4$race)
##        
##          1  2  3
##   White 19  0  0
##   Black  0  6  0
##   Other  0  0  5
dim(bw4)
## [1] 30 12

##Việc 6. Sử dụng gói lệnh lessR

###6.1 Vẽ biểu đồ phân bố histogram cân nặng của con (bwt)

library(lessR)
## Warning: package 'lessR' was built under R version 4.4.1
## 
## lessR 4.4.2                         feedback: gerbing@pdx.edu 
## --------------------------------------------------------------
## > d <- Read("")  Read data file, many formats available, e.g., Excel
##   d is default data frame, data= in analysis routines optional
## 
## Many examples of reading, writing, and manipulating data, 
## graphics, testing means and proportions, regression, factor analysis,
## customization, forecasting, and aggregation from pivot tables
##   Enter: browseVignettes("lessR")
## 
## View lessR updates, now including time series forecasting
##   Enter: news(package="lessR")
## 
## Interactive data analysis
##   Enter: interact()
## 
## Attaching package: 'lessR'
## The following objects are masked from 'package:dplyr':
## 
##     order_by, recode, rename
## The following object is masked from 'package:base':
## 
##     sort_by
Histogram(bwt, data = bw, fill = "steelblue", xlab = "Birthweight (g)", ylab = "Frequency")

## >>> Suggestions 
## bin_width: set the width of each bin 
## bin_start: set the start of the first bin 
## bin_end: set the end of the last bin 
## Histogram(bwt, density=TRUE)  # smoothed curve + histogram 
## Plot(bwt)  # Violin/Box/Scatterplot (VBS) plot 
## 
## --- bwt --- 
##  
##       n   miss       mean         sd        min        mdn        max 
##      189      0    2944.59     729.21     709.00    2977.00    4990.00 
##  
## 
##   
## --- Outliers ---     from the box plot: 1 
##  
## Small        Large 
## -----        ----- 
##  709.0            
## 
## 
## Bin Width: 500 
## Number of Bins: 9 
##  
##          Bin  Midpnt  Count    Prop  Cumul.c  Cumul.p 
## ----------------------------------------------------- 
##   500 > 1000     750      1    0.01        1     0.01 
##  1000 > 1500    1250      4    0.02        5     0.03 
##  1500 > 2000    1750     14    0.07       19     0.10 
##  2000 > 2500    2250     40    0.21       59     0.31 
##  2500 > 3000    2750     38    0.20       97     0.51 
##  3000 > 3500    3250     45    0.24      142     0.75 
##  3500 > 4000    3750     38    0.20      180     0.95 
##  4000 > 4500    4250      7    0.04      187     0.99 
##  4500 > 5000    4750      2    0.01      189     1.00 
## 

###6.2 Vẽ biểu đồ thanh (bar chart) chủng tộc (ethnicity)

BarChart(ethnicity, data = bw)

## >>> Suggestions
## BarChart(ethnicity, horiz=TRUE)  # horizontal bar chart
## BarChart(ethnicity, fill="reds")  # red bars of varying lightness
## PieChart(ethnicity)  # doughnut (ring) chart
## Plot(ethnicity)  # bubble plot
## Plot(ethnicity, stat="count")  # lollipop plot 
## 
## --- ethnicity --- 
## 
## Missing Values: 0 
## 
##                White  Black  Other     Total 
## Frequencies:      96     26     67       189 
## Proportions:   0.508  0.138  0.354     1.000 
## 
## Chi-squared test of null hypothesis of equal probabilities 
##   Chisq = 39.270, df = 2, p-value = 0.000

###6.3 Vẽ biểu đồ tương quan giữa cân nặng của mẹ (lwt) và cân nặng của con (bwt)

ScatterPlot(lwt, bwt, fit = "lm" , data = bw)

## 
## 
## >>> Suggestions  or  enter: style(suggest=FALSE)
## Plot(lwt, bwt, enhance=TRUE)  # many options
## Plot(lwt, bwt, fill="skyblue")  # interior fill color of points
## Plot(lwt, bwt, MD_cut=6)  # Mahalanobis distance from center > 6 is an outlier 
## 
## 
## >>> Pearson's product-moment correlation 
##  
## Number of paired values with neither missing, n = 189 
## Sample Correlation of lwt and bwt: r = 0.186 
##   
## Hypothesis Test of 0 Correlation:  t = 2.585,  df = 187,  p-value = 0.011 
## 95% Confidence Interval for Correlation:  0.044 to 0.320 
##   
## 
##   Line: b0 = 2369.624    b1 = 4.429    Linear Model MSE = 516,155.173   Rsq = 0.034
##