Su dung bo du lieu trees va thuc hien cac bien doi sau:
Doi ten cot bien Girth thanh girth
# Load data
data(trees)
# Load library
library(dplyr)
trees %>% rename(girth = Girth) -> trees
head(trees)## girth Height Volume
## 1 8.3 70 10.3
## 2 8.6 65 10.3
## 3 8.8 63 10.2
## 4 10.5 72 16.4
## 5 10.7 81 18.8
## 6 10.8 83 19.7
Tinh trung binh & do lech chuan cho cot bien Volume
## [1] 30.17097
## [1] 16.43785
Tao ratio = Volume/Height
## girth Height Volume ratio
## 1 8.3 70 10.3 0.1471429
## 2 8.6 65 10.3 0.1584615
## 3 8.8 63 10.2 0.1619048
## 4 10.5 72 16.4 0.2277778
## 5 10.7 81 18.8 0.2320988
## 6 10.8 83 19.7 0.2373494
Tao girth2 Height2 Volume2 (squared variables)
# Method 1
trees %>% mutate(girth2 = girth^2,
Height2 = Height^2,
Volume2 = Volume^2) -> trees_m1
head(trees_m1)## girth Height Volume ratio girth2 Height2 Volume2
## 1 8.3 70 10.3 0.1471429 68.89 4900 106.09
## 2 8.6 65 10.3 0.1584615 73.96 4225 106.09
## 3 8.8 63 10.2 0.1619048 77.44 3969 104.04
## 4 10.5 72 16.4 0.2277778 110.25 5184 268.96
## 5 10.7 81 18.8 0.2320988 114.49 6561 353.44
## 6 10.8 83 19.7 0.2373494 116.64 6889 388.09
# Method 2
# Write a function - square
sqr <- function(x) {
y <- x^2
return(y)
}
trees %>% mutate_if(is.numeric, sqr) -> trees_m2
head(trees_m2)## girth Height Volume ratio
## 1 68.89 4900 106.09 0.02165102
## 2 73.96 4225 106.09 0.02511006
## 3 77.44 3969 104.04 0.02621315
## 4 110.25 5184 268.96 0.05188272
## 5 114.49 6561 353.44 0.05386984
## 6 116.64 6889 388.09 0.05633474
Tao bo du lieu chua Top 3 highest and Top 3 least volume
# Filter top_3
trees %>% top_n(n = 3, wt = Height) -> top3_height
trees %>% top_n(n = 3, wt = -Volume) -> top3_Volume
# Combine
top3 <- bind_rows(top3_height, top3_Volume)
top3## girth Height Volume ratio
## 1 12.9 85 33.8 0.3976471
## 2 13.3 86 27.4 0.3186047
## 3 20.6 87 77.0 0.8850575
## 4 8.3 70 10.3 0.1471429
## 5 8.6 65 10.3 0.1584615
## 6 8.8 63 10.2 0.1619048
Tao cot bien Category, Group 1 = (Volume >= 37), Group 2 = (Volume >= 19), Group 3 = (Volume < 19)
trees %>% mutate(
category = case_when(
Volume >= 37 ~ "Group 1",
Volume < 37 & Volume >= 19 ~ "Group 2",
TRUE ~ "Group 3"
)
) -> trees
head(trees)## girth Height Volume ratio category
## 1 8.3 70 10.3 0.1471429 Group 3
## 2 8.6 65 10.3 0.1584615 Group 3
## 3 8.8 63 10.2 0.1619048 Group 3
## 4 10.5 72 16.4 0.2277778 Group 3
## 5 10.7 81 18.8 0.2320988 Group 3
## 6 10.8 83 19.7 0.2373494 Group 2
Su dung file du lieu kinhteluong
# Clear Workspace
rm(list = ls())
# Load library stringr
library(stringr)
# Address folder
data_path <- dir("D:\\DATA_SCIENCE\\R_COURSE_CASED\\kinhteluong", full.names = TRUE)Có bao nhiêu files dữ liệu có cụm kí tự .dta?
# Specify filter condition
condition <- str_detect(data_path, pattern = ".dta")
# Count -> 12
total_dta <- sum(condition)
total_dta## [1] 12
Liệt kê đường dẫn đầy đủ của tất cả các files dữ liệu có cụm kí tự .dta
## [1] "D:\\DATA SCIENCE\\R_COURSE_CASED\\kinhteluong/dung_stata13.dta"
## [2] "D:\\DATA SCIENCE\\R_COURSE_CASED\\kinhteluong/intdef.dta"
## [3] "D:\\DATA SCIENCE\\R_COURSE_CASED\\kinhteluong/nghiyeu.dta"
## [4] "D:\\DATA SCIENCE\\R_COURSE_CASED\\kinhteluong/Panel1.dta"
## [5] "D:\\DATA SCIENCE\\R_COURSE_CASED\\kinhteluong/probit.dta"
## [6] "D:\\DATA SCIENCE\\R_COURSE_CASED\\kinhteluong/psi.dta"
## [7] "D:\\DATA SCIENCE\\R_COURSE_CASED\\kinhteluong/Table1_1.dta"
## [8] "D:\\DATA SCIENCE\\R_COURSE_CASED\\kinhteluong/Table13_1.dta"
## [9] "D:\\DATA SCIENCE\\R_COURSE_CASED\\kinhteluong/Table4_0.dta"
## [10] "D:\\DATA SCIENCE\\R_COURSE_CASED\\kinhteluong/Table5_1.dta"
## [11] "D:\\DATA SCIENCE\\R_COURSE_CASED\\kinhteluong/Table6_1.dta"
## [12] "D:\\DATA SCIENCE\\R_COURSE_CASED\\kinhteluong/Table8_1.dta"