install.packages(“dplyr”)
1 + 2 # Phép cộng
## [1] 3
(2 + 2) * 3 # Ưu tiên trong ngoặc
## [1] 12
4 / 3 # Chia ra số thực
## [1] 1.333333
4 %/% 3 # Chia lấy phần nguyên
## [1] 1
4 %% 3 # Chia lấy phần dư
## [1] 1
2^2 # Lũy thừa
## [1] 4
2**2 # Lũy thừa (cách viết khác)
## [1] 4
### Giá trị thực
y_thuc <- c(3, 5, 7, 9)
### Giá trị dự đoán
y_du_doan <- c(2.5, 5.2, 6.8, 9.5)
### Tính RMSE
rmse <- sqrt(mean((y_thuc - y_du_doan)^2))
rmse
## [1] 0.3807887
a <- 8
b <- 10
s <- a * b
print(s)
## [1] 80
x <- 2
2 * x
## [1] 4
2 -> x
(y <- "visible")
## [1] "visible"
"hello,"
## [1] "hello,"
'world!'
## [1] "world!"
print("hello my sweetie")
## [1] "hello my sweetie"
v <- 1:5
x_vec <- 1:4
x_vec <- x_vec^2
sum(x_vec)
## [1] 30
x <- c("cho","meo","ga","vit")
length(x)
## [1] 4
nchar("di hoc") # 5 ký tự
## [1] 6
v <- 4:20
v[1]
## [1] 4
v[1:3]
## [1] 4 5 6
v[c(1,3,5)]
## [1] 4 6 8
v[v %% 2 == 0]
## [1] 4 6 8 10 12 14 16 18 20
v[-(1:3)]
## [1] 7 8 9 10 11 12 13 14 15 16 17 18 19 20
v_named <- c("A"=1,"B"=2,"C"=3)
v_named["A"]
## A
## 1
names(v_named) <- c("x","y","z")
square <- function(x) x^2
square(1:4)
## [1] 1 4 9 16
square_and_subtract <- function(x, y) {
squared <- x^2
squared - y
}
square_and_subtract(1:5, rev(1:5))
## [1] -4 0 6 14 24
average <- function(x) sum(x) / length(x)
average(7:10)
## [1] 8.5
if (2 > 3) "false" else "true"
## [1] "true"
if (3 < 2) "true"
x <- if (2 > 3) "bar" else "baz"
x_vec <- c(1, 5, 2, 8)
ifelse(x_vec > 3, "Lon hon 3", "Nho hon hoac bang 3")
## [1] "Nho hon hoac bang 3" "Lon hon 3" "Nho hon hoac bang 3"
## [4] "Lon hon 3"
x_loop <- 1:5
total <- 0
for (element in x_loop) {
total <- total + element
}
total
## [1] 15
NA + 5
## [1] NA
is.na(NA)
## [1] TRUE
is.na(4)
## [1] FALSE
sum(c(1, NA, 2), na.rm = TRUE)
## [1] 3
df <- data.frame(x=1:5, y=6:10, a=11:15, b=16:20)
df
## x y a b
## 1 1 6 11 16
## 2 2 7 12 17
## 3 3 8 13 18
## 4 4 9 14 19
## 5 5 10 15 20
df[1,1]
## [1] 1
df[,"a"]
## [1] 11 12 13 14 15
df$b
## [1] 16 17 18 19 20
library(magrittr)
x <- 10
f <- function(k) k + 2
g <- function(k) k * 3
x %>% f %>% g
## [1] 36
d <- data.frame(x=rnorm(10), y=rnorm(10))
d %>% lm(y ~ x, data = .)
##
## Call:
## lm(formula = y ~ x, data = .)
##
## Coefficients:
## (Intercept) x
## 0.35434 -0.04214
gender <- factor(c("male","female","female","male"))
gender
## [1] male female female male
## Levels: female male
levels(gender)
## [1] "female" "male"
nlevels(gender)
## [1] 2
my_list <- list(
name = "Dat",
age = 25,
scores = c(8,9,10),
passed = TRUE
)
my_list$name
## [1] "Dat"
my_list[[3]]
## [1] 8 9 10
my_list[3]
## $scores
## [1] 8 9 10
M <- matrix(1:6, nrow=2, ncol=3)
M
## [,1] [,2] [,3]
## [1,] 1 3 5
## [2,] 2 4 6
M[1,2]
## [1] 3
M[2,]
## [1] 2 4 6
M[,1]
## [1] 1 2
M_transpose <- t(M)
prod <- M %*% M_transpose
prod
## [,1] [,2]
## [1,] 35 44
## [2,] 44 56
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
set.seed(123)
mean_positive <- rnorm(1000) %>%
replace(. < 0, NA) %>%
mean(na.rm = TRUE)
mean_positive
## [1] 0.7936708
df_rmse <- data.frame(
t = c(3,5,2.5,7),
y = c(2.5,5,4,8)
)
rmse <- df_rmse %>%
mutate(sq_error = (t - y)^2) %>%
summarise(mse = mean(sq_error)) %>%
mutate(rmse = sqrt(mse)) %>%
pull(rmse)
rmse
## [1] 0.9354143
x <- c(-3,5,0,7,-2,10)
x_duong <- x[x > 0]
mean(x_duong)
## [1] 7.333333
y_thuc <- c(3,5,7,9)
y_du_doan <- c(2.5,5.2,6.8,9.5)
rmse <- sqrt(mean((y_thuc - y_du_doan)^2))
rmse
## [1] 0.3807887