# Clear Workspace
rm(list = ls())

Exercise 1

Su dung bo du lieu trees va thuc hien cac bien doi sau:

1.a

Doi ten cot bien Girth thanh girth

# Load data
data(trees)

# Load library
library(dplyr)

trees  %>% rename(girth = Girth) -> trees

head(trees)
##   girth Height Volume
## 1   8.3     70   10.3
## 2   8.6     65   10.3
## 3   8.8     63   10.2
## 4  10.5     72   16.4
## 5  10.7     81   18.8
## 6  10.8     83   19.7

1.b

Tinh trung binh & do lech chuan cho cot bien Volume

m_volume <- mean(trees$Volume)
sd_volume <- sd(trees$Volume)

m_volume
## [1] 30.17097
sd_volume
## [1] 16.43785

1.c

Tao ratio = Volume/Height

trees  %>% mutate(ratio = Volume/Height) -> trees

head(trees)
##   girth Height Volume     ratio
## 1   8.3     70   10.3 0.1471429
## 2   8.6     65   10.3 0.1584615
## 3   8.8     63   10.2 0.1619048
## 4  10.5     72   16.4 0.2277778
## 5  10.7     81   18.8 0.2320988
## 6  10.8     83   19.7 0.2373494

1.d

Tao girth2 Height2 Volume2 (squared variables)

# Method 1
trees  %>% mutate(girth2 = girth^2, 
                  Height2 = Height^2,
                  Volume2 = Volume^2) -> trees_m1

head(trees_m1)
##   girth Height Volume     ratio girth2 Height2 Volume2
## 1   8.3     70   10.3 0.1471429  68.89    4900  106.09
## 2   8.6     65   10.3 0.1584615  73.96    4225  106.09
## 3   8.8     63   10.2 0.1619048  77.44    3969  104.04
## 4  10.5     72   16.4 0.2277778 110.25    5184  268.96
## 5  10.7     81   18.8 0.2320988 114.49    6561  353.44
## 6  10.8     83   19.7 0.2373494 116.64    6889  388.09
# Method 2
# Write a function - square
sqr <- function(x) {
  y <- x^2
  return(y)
}

trees  %>% mutate_if(is.numeric, sqr) -> trees_m2

head(trees_m2)
##    girth Height Volume      ratio
## 1  68.89   4900 106.09 0.02165102
## 2  73.96   4225 106.09 0.02511006
## 3  77.44   3969 104.04 0.02621315
## 4 110.25   5184 268.96 0.05188272
## 5 114.49   6561 353.44 0.05386984
## 6 116.64   6889 388.09 0.05633474

1.e

Tao bo du lieu chua Top 3 highest and Top 3 least volume

#   Filter top_3

trees  %>% top_n(n = 3, wt = Height) -> top3_height

trees  %>% top_n(n = 3, wt = -Volume) -> top3_Volume

#   Combine 
top3 <- bind_rows(top3_height, top3_Volume)

top3
##   girth Height Volume     ratio
## 1  12.9     85   33.8 0.3976471
## 2  13.3     86   27.4 0.3186047
## 3  20.6     87   77.0 0.8850575
## 4   8.3     70   10.3 0.1471429
## 5   8.6     65   10.3 0.1584615
## 6   8.8     63   10.2 0.1619048

1.f

Tao cot bien Category, Group 1 = (Volume >= 37), Group 2 = (Volume >= 19), Group 3 = (Volume < 19)

trees  %>% mutate(
    category = case_when(
      Volume >= 37 ~ "Group 1",
      Volume < 37 & Volume >= 19 ~ "Group 2",
      TRUE ~ "Group 3"
    )
  ) -> trees

head(trees)
##   girth Height Volume     ratio category
## 1   8.3     70   10.3 0.1471429  Group 3
## 2   8.6     65   10.3 0.1584615  Group 3
## 3   8.8     63   10.2 0.1619048  Group 3
## 4  10.5     72   16.4 0.2277778  Group 3
## 5  10.7     81   18.8 0.2320988  Group 3
## 6  10.8     83   19.7 0.2373494  Group 2

Exercise 2

Su dung file du lieu kinhteluong

# Clear Workspace
rm(list = ls())

# Load library stringr
library(stringr)

# Address folder
data_path <- dir("D:\\DATA_SCIENCE\\R_COURSE_CASED\\kinhteluong", full.names = TRUE)

2.a

Có bao nhiêu files dữ liệu có cụm kí tự .dta?

# Specify filter condition
condition <- str_detect(data_path, pattern = ".dta")

# Count -> 12 
total_dta <- sum(condition)

total_dta
## [1] 12

2.b

Liệt kê đường dẫn đầy đủ của tất cả các files dữ liệu có cụm kí tự .dta

dta_files <- data_path[condition]

dta_files
##  [1] "D:\\DATA SCIENCE\\R_COURSE_CASED\\kinhteluong/dung_stata13.dta"
##  [2] "D:\\DATA SCIENCE\\R_COURSE_CASED\\kinhteluong/intdef.dta"      
##  [3] "D:\\DATA SCIENCE\\R_COURSE_CASED\\kinhteluong/nghiyeu.dta"     
##  [4] "D:\\DATA SCIENCE\\R_COURSE_CASED\\kinhteluong/Panel1.dta"      
##  [5] "D:\\DATA SCIENCE\\R_COURSE_CASED\\kinhteluong/probit.dta"      
##  [6] "D:\\DATA SCIENCE\\R_COURSE_CASED\\kinhteluong/psi.dta"         
##  [7] "D:\\DATA SCIENCE\\R_COURSE_CASED\\kinhteluong/Table1_1.dta"    
##  [8] "D:\\DATA SCIENCE\\R_COURSE_CASED\\kinhteluong/Table13_1.dta"   
##  [9] "D:\\DATA SCIENCE\\R_COURSE_CASED\\kinhteluong/Table4_0.dta"    
## [10] "D:\\DATA SCIENCE\\R_COURSE_CASED\\kinhteluong/Table5_1.dta"    
## [11] "D:\\DATA SCIENCE\\R_COURSE_CASED\\kinhteluong/Table6_1.dta"    
## [12] "D:\\DATA SCIENCE\\R_COURSE_CASED\\kinhteluong/Table8_1.dta"