## Warning: package 'readxl' was built under R version 4.5.1
## # A tibble: 6 × 13
## `Student ID` Age Gender Height Weight `Blood Type` BMI Temperature
## <dbl> <dbl> <chr> <dbl> <dbl> <chr> <dbl> <dbl>
## 1 1 18 Female 162. 72.4 O 27.6 NA
## 2 2 NA Male 152. 47.6 B NA 98.7
## 3 3 32 Female 183. 55.7 A 16.7 98.3
## 4 NA 30 Male 182. 63.3 B 19.1 98.8
## 5 5 23 Female NA 46.2 O NA 98.5
## 6 6 32 <NA> 151. 68.6 B 29.9 99.7
## # ℹ 5 more variables: `Heart Rate` <dbl>, `Blood Pressure` <dbl>,
## # Cholesterol <dbl>, Diabetes <chr>, Smoking <chr>
dim(dataset)
## [1] 200000 13
names(dataset)
## [1] "Student ID" "Age" "Gender" "Height"
## [5] "Weight" "Blood Type" "BMI" "Temperature"
## [9] "Heart Rate" "Blood Pressure" "Cholesterol" "Diabetes"
## [13] "Smoking"
sum(duplicated(dataset))
## [1] 7644
colSums(is.na(dataset))
## Student ID Age Gender Height Weight
## 20000 20000 20000 20000 20000
## Blood Type BMI Temperature Heart Rate Blood Pressure
## 20000 20000 20000 20000 20000
## Cholesterol Diabetes Smoking
## 20000 20000 20000
variable_meaning <- data.frame(
Variable = c(
"Student.ID",
"Age",
"Gender",
"Height",
"Weight",
"Blood.Type",
"BMI",
"Temperature",
"Heart.Rate",
"Blood.Pressure",
"Cholesterol",
"Diabetes",
"Smoking"
),
Meaning = c(
"Mã số học sinh/bệnh nhân",
"Tuổi (năm)",
"Giới tính (Male/Female)",
"Chiều cao (cm)",
"Cân nặng (kg)",
"Nhóm máu (A, B, AB, O)",
"Chỉ số BMI (khối lượng cơ thể)",
"Nhiệt độ cơ thể (°C)",
"Nhịp tim (bpm)",
"Huyết áp (mmHg)",
"Cholesterol (mg/dL)",
"Tình trạng tiểu đường (Yes/No hoặc 1/0)",
"Tình trạng hút thuốc (Yes/No hoặc lịch sử hút thuốc)"
),
stringsAsFactors = FALSE
)
library(knitr)
## Warning: package 'knitr' was built under R version 4.5.1
kable(variable_meaning, booktabs = TRUE)
| Variable | Meaning |
|---|---|
| Student.ID | Mã số học sinh/bệnh nhân |
| Age | Tuổi (năm) |
| Gender | Giới tính (Male/Female) |
| Height | Chiều cao (cm) |
| Weight | Cân nặng (kg) |
| Blood.Type | Nhóm máu (A, B, AB, O) |
| BMI | Chỉ số BMI (khối lượng cơ thể) |
| Temperature | Nhiệt độ cơ thể (°C) |
| Heart.Rate | Nhịp tim (bpm) |
| Blood.Pressure | Huyết áp (mmHg) |
| Cholesterol | Cholesterol (mg/dL) |
| Diabetes | Tình trạng tiểu đường (Yes/No hoặc 1/0) |
| Smoking | Tình trạng hút thuốc (Yes/No hoặc lịch sử hút thuốc) |
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.1
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
bloodtype_O_data <- dataset %>%
dplyr::filter("Blood Type" == "O")
head(bloodtype_O_data)
## # A tibble: 0 × 13
## # ℹ 13 variables: Student ID <dbl>, Age <dbl>, Gender <chr>, Height <dbl>,
## # Weight <dbl>, Blood Type <chr>, BMI <dbl>, Temperature <dbl>,
## # Heart Rate <dbl>, Blood Pressure <dbl>, Cholesterol <dbl>, Diabetes <chr>,
## # Smoking <chr>
install.packages("dplyr")
## Warning: package 'dplyr' is in use and will not be installed
female_data <- dataset %>%
dplyr::filter(Gender == "Female")
head(female_data)
## # A tibble: 6 × 13
## `Student ID` Age Gender Height Weight `Blood Type` BMI Temperature
## <dbl> <dbl> <chr> <dbl> <dbl> <chr> <dbl> <dbl>
## 1 1 18 Female 162. 72.4 O 27.6 NA
## 2 3 32 Female 183. 55.7 A 16.7 98.3
## 3 5 23 Female NA 46.2 O NA 98.5
## 4 11 28 Female 153. 73.6 B 31.4 98.4
## 5 12 34 Female 182. 76.4 AB 23.0 98.1
## 6 19 31 Female 159. 46.8 AB 18.6 98.8
## # ℹ 5 more variables: `Heart Rate` <dbl>, `Blood Pressure` <dbl>,
## # Cholesterol <dbl>, Diabetes <chr>, Smoking <chr>
age_stats <- c(
Min = min(dataset$Age, na.rm = TRUE),
Max = max(dataset$Age, na.rm = TRUE),
Mean = mean(dataset$Age, na.rm = TRUE),
Median = median(dataset$Age, na.rm = TRUE),
SD = sd(dataset$Age, na.rm = TRUE),
Var = var(dataset$Age, na.rm = TRUE)
)
age_stats
## Min Max Mean Median SD Var
## 18.000000 34.000000 26.021561 26.000000 4.890528 23.917262
bmi_stats <- c(
Min = min(dataset$BMI, na.rm = TRUE),
Max = max(dataset$BMI, na.rm = TRUE),
Mean = mean(dataset$BMI, na.rm = TRUE),
Median = median(dataset$BMI, na.rm = TRUE),
SD = sd(dataset$BMI, na.rm = TRUE),
Var = var(dataset$BMI, na.rm = TRUE)
)
bmi_stats
## Min Max Mean Median SD Var
## 10.074837 44.355113 23.338869 22.671401 7.033554 49.470878
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.