vec <- c(3,5,2,1,5,"O",NA)
length(unique(vec))
## [1] 6
num_vec <- as.numeric(vec)
log(num_vec)
## [1] 1.0986123 1.6094379 0.6931472 0.0000000 1.6094379 NA NA
sum(c(num_vec, NA), na.rm=T)
## [1] 16
sort(num_vec, decreasing = T)
## [1] 5 5 3 2 1
is.na(num_vec)
## [1] FALSE FALSE FALSE FALSE FALSE TRUE TRUE
num_vec[!is.na(num_vec)]
## [1] 3 5 2 1 5
c(5,6) %in% vec
## [1] TRUE FALSE
grepl("5", vec)
## [1] FALSE TRUE FALSE FALSE TRUE FALSE FALSE
seq(from = 0, to = 10, by = 0.5)
## [1] 0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0 5.5 6.0 6.5 7.0
## [16] 7.5 8.0 8.5 9.0 9.5 10.0
rep(x = 1:3, times = 4)
## [1] 1 2 3 1 2 3 1 2 3 1 2 3
rep(x = 1:3, each = 4)
## [1] 1 1 1 1 2 2 2 2 3 3 3 3
getwd()
## [1] "C:/Users/hed2/Downloads/code-storage/code"
setwd(getwd())
write.csv(cars, "cars.csv", row.names=F)
dataframe <- read.csv("cars.csv")
my_func <- function(x){
x_mod <- (x + 7) * 4
return(x_mod)
}
my_func(num_vec)
## [1] 40 48 36 32 48 NA NA
plot(dist ~ speed, data=cars)
hist(cars$dist )
### Build model and plot
model <- lm(dist ~ speed, data=cars)
plot(dist ~ speed, data=cars)
abline(model)
abline(v = 25)
abline(h = 15)
### Rename names of columns
names(cars)
## [1] "speed" "dist"
names(cars) <- c("speed per hour", "total dist")
matrix <- as.matrix(cars)
df <- as.data.frame(matrix)
class(matrix)
## [1] "matrix" "array"
class(df)
## [1] "data.frame"
# tranform
t(matrix)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
## speed per hour 4 4 7 7 8 9 10 10 10 11 11 12
## total dist 2 10 4 22 16 10 18 26 34 17 28 14
## [,13] [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22]
## speed per hour 12 12 12 13 13 13 13 14 14 14
## total dist 20 24 28 26 34 34 46 26 36 60
## [,23] [,24] [,25] [,26] [,27] [,28] [,29] [,30] [,31] [,32]
## speed per hour 14 15 15 15 16 16 17 17 17 18
## total dist 80 20 26 54 32 40 32 40 50 42
## [,33] [,34] [,35] [,36] [,37] [,38] [,39] [,40] [,41] [,42]
## speed per hour 18 18 18 19 19 19 20 20 20 20
## total dist 56 76 84 36 46 68 32 48 52 56
## [,43] [,44] [,45] [,46] [,47] [,48] [,49] [,50]
## speed per hour 20 22 23 24 24 24 24 25
## total dist 64 66 54 70 92 93 120 85
paste0("raster_", 1:10)
## [1] "raster_1" "raster_2" "raster_3" "raster_4" "raster_5" "raster_6"
## [7] "raster_7" "raster_8" "raster_9" "raster_10"
paste0("raster_", rep(x = 1:5, times = 10))
## [1] "raster_1" "raster_2" "raster_3" "raster_4" "raster_5" "raster_1"
## [7] "raster_2" "raster_3" "raster_4" "raster_5" "raster_1" "raster_2"
## [13] "raster_3" "raster_4" "raster_5" "raster_1" "raster_2" "raster_3"
## [19] "raster_4" "raster_5" "raster_1" "raster_2" "raster_3" "raster_4"
## [25] "raster_5" "raster_1" "raster_2" "raster_3" "raster_4" "raster_5"
## [31] "raster_1" "raster_2" "raster_3" "raster_4" "raster_5" "raster_1"
## [37] "raster_2" "raster_3" "raster_4" "raster_5" "raster_1" "raster_2"
## [43] "raster_3" "raster_4" "raster_5" "raster_1" "raster_2" "raster_3"
## [49] "raster_4" "raster_5"
df$group <- paste0("raster_", rep(x = 1:5, times = 10))
df$id <- paste0("raster_", 1:50)
sample <- round((rnorm(50,0, 1)),2)
group <- paste0("raster_", rep(x = 1:5, times = 10))
df_join <- data.frame(sample, group)
df_join$id <- paste0("raster_", 1:50)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data_all <- left_join(df, df_join, by="id")
head(data_all)
## speed per hour total dist group.x id sample group.y
## 1 4 2 raster_1 raster_1 0.93 raster_1
## 2 4 10 raster_2 raster_2 0.19 raster_2
## 3 7 4 raster_3 raster_3 0.02 raster_3
## 4 7 22 raster_4 raster_4 -0.23 raster_4
## 5 8 16 raster_5 raster_5 -0.48 raster_5
## 6 9 10 raster_1 raster_6 0.42 raster_1
select(data_all, group.x, id )
## group.x id
## 1 raster_1 raster_1
## 2 raster_2 raster_2
## 3 raster_3 raster_3
## 4 raster_4 raster_4
## 5 raster_5 raster_5
## 6 raster_1 raster_6
## 7 raster_2 raster_7
## 8 raster_3 raster_8
## 9 raster_4 raster_9
## 10 raster_5 raster_10
## 11 raster_1 raster_11
## 12 raster_2 raster_12
## 13 raster_3 raster_13
## 14 raster_4 raster_14
## 15 raster_5 raster_15
## 16 raster_1 raster_16
## 17 raster_2 raster_17
## 18 raster_3 raster_18
## 19 raster_4 raster_19
## 20 raster_5 raster_20
## 21 raster_1 raster_21
## 22 raster_2 raster_22
## 23 raster_3 raster_23
## 24 raster_4 raster_24
## 25 raster_5 raster_25
## 26 raster_1 raster_26
## 27 raster_2 raster_27
## 28 raster_3 raster_28
## 29 raster_4 raster_29
## 30 raster_5 raster_30
## 31 raster_1 raster_31
## 32 raster_2 raster_32
## 33 raster_3 raster_33
## 34 raster_4 raster_34
## 35 raster_5 raster_35
## 36 raster_1 raster_36
## 37 raster_2 raster_37
## 38 raster_3 raster_38
## 39 raster_4 raster_39
## 40 raster_5 raster_40
## 41 raster_1 raster_41
## 42 raster_2 raster_42
## 43 raster_3 raster_43
## 44 raster_4 raster_44
## 45 raster_5 raster_45
## 46 raster_1 raster_46
## 47 raster_2 raster_47
## 48 raster_3 raster_48
## 49 raster_4 raster_49
## 50 raster_5 raster_50
raster_1 <- filter(data_all, group.x == "raster_1")
raster_1
## speed per hour total dist group.x id sample group.y
## 1 4 2 raster_1 raster_1 0.93 raster_1
## 2 9 10 raster_1 raster_6 0.42 raster_1
## 3 11 28 raster_1 raster_11 -0.11 raster_1
## 4 13 26 raster_1 raster_16 -0.91 raster_1
## 5 14 36 raster_1 raster_21 -0.54 raster_1
## 6 15 54 raster_1 raster_26 -1.08 raster_1
## 7 17 50 raster_1 raster_31 -0.24 raster_1
## 8 19 36 raster_1 raster_36 -2.27 raster_1
## 9 20 52 raster_1 raster_41 -0.34 raster_1
## 10 24 70 raster_1 raster_46 -0.77 raster_1
speed_dist <- filter(data_all, data_all$`speed per hour` < 11 & data_all$`total dist` >= 10)
speed_dist
## speed per hour total dist group.x id sample group.y
## 1 4 10 raster_2 raster_2 0.19 raster_2
## 2 7 22 raster_4 raster_4 -0.23 raster_4
## 3 8 16 raster_5 raster_5 -0.48 raster_5
## 4 9 10 raster_1 raster_6 0.42 raster_1
## 5 10 18 raster_2 raster_7 -0.37 raster_2
## 6 10 26 raster_3 raster_8 -0.57 raster_3
## 7 10 34 raster_4 raster_9 0.54 raster_4
rbind(raster_1,speed_dist)
## speed per hour total dist group.x id sample group.y
## 1 4 2 raster_1 raster_1 0.93 raster_1
## 2 9 10 raster_1 raster_6 0.42 raster_1
## 3 11 28 raster_1 raster_11 -0.11 raster_1
## 4 13 26 raster_1 raster_16 -0.91 raster_1
## 5 14 36 raster_1 raster_21 -0.54 raster_1
## 6 15 54 raster_1 raster_26 -1.08 raster_1
## 7 17 50 raster_1 raster_31 -0.24 raster_1
## 8 19 36 raster_1 raster_36 -2.27 raster_1
## 9 20 52 raster_1 raster_41 -0.34 raster_1
## 10 24 70 raster_1 raster_46 -0.77 raster_1
## 11 4 10 raster_2 raster_2 0.19 raster_2
## 12 7 22 raster_4 raster_4 -0.23 raster_4
## 13 8 16 raster_5 raster_5 -0.48 raster_5
## 14 9 10 raster_1 raster_6 0.42 raster_1
## 15 10 18 raster_2 raster_7 -0.37 raster_2
## 16 10 26 raster_3 raster_8 -0.57 raster_3
## 17 10 34 raster_4 raster_9 0.54 raster_4
mutate(data_all,
sample = round(sample,1))
## speed per hour total dist group.x id sample group.y
## 1 4 2 raster_1 raster_1 0.9 raster_1
## 2 4 10 raster_2 raster_2 0.2 raster_2
## 3 7 4 raster_3 raster_3 0.0 raster_3
## 4 7 22 raster_4 raster_4 -0.2 raster_4
## 5 8 16 raster_5 raster_5 -0.5 raster_5
## 6 9 10 raster_1 raster_6 0.4 raster_1
## 7 10 18 raster_2 raster_7 -0.4 raster_2
## 8 10 26 raster_3 raster_8 -0.6 raster_3
## 9 10 34 raster_4 raster_9 0.5 raster_4
## 10 11 17 raster_5 raster_10 0.9 raster_5
## 11 11 28 raster_1 raster_11 -0.1 raster_1
## 12 12 14 raster_2 raster_12 -0.4 raster_2
## 13 12 20 raster_3 raster_13 -0.3 raster_3
## 14 12 24 raster_4 raster_14 -1.2 raster_4
## 15 12 28 raster_5 raster_15 0.7 raster_5
## 16 13 26 raster_1 raster_16 -0.9 raster_1
## 17 13 34 raster_2 raster_17 1.4 raster_2
## 18 13 34 raster_3 raster_18 0.1 raster_3
## 19 13 46 raster_4 raster_19 -0.1 raster_4
## 20 14 26 raster_5 raster_20 -0.7 raster_5
## 21 14 36 raster_1 raster_21 -0.5 raster_1
## 22 14 60 raster_2 raster_22 -1.1 raster_2
## 23 14 80 raster_3 raster_23 -1.1 raster_3
## 24 15 20 raster_4 raster_24 0.0 raster_4
## 25 15 26 raster_5 raster_25 -0.2 raster_5
## 26 15 54 raster_1 raster_26 -1.1 raster_1
## 27 16 32 raster_2 raster_27 -1.0 raster_2
## 28 16 40 raster_3 raster_28 1.0 raster_3
## 29 17 32 raster_4 raster_29 -1.2 raster_4
## 30 17 40 raster_5 raster_30 -1.2 raster_5
## 31 17 50 raster_1 raster_31 -0.2 raster_1
## 32 18 42 raster_2 raster_32 -1.6 raster_2
## 33 18 56 raster_3 raster_33 0.8 raster_3
## 34 18 76 raster_4 raster_34 1.9 raster_4
## 35 18 84 raster_5 raster_35 0.0 raster_5
## 36 19 36 raster_1 raster_36 -2.3 raster_1
## 37 19 46 raster_2 raster_37 -0.3 raster_2
## 38 19 68 raster_3 raster_38 0.0 raster_3
## 39 20 32 raster_4 raster_39 -0.5 raster_4
## 40 20 48 raster_5 raster_40 -0.8 raster_5
## 41 20 52 raster_1 raster_41 -0.3 raster_1
## 42 20 56 raster_2 raster_42 0.0 raster_2
## 43 20 64 raster_3 raster_43 -0.8 raster_3
## 44 22 66 raster_4 raster_44 0.9 raster_4
## 45 23 54 raster_5 raster_45 -1.6 raster_5
## 46 24 70 raster_1 raster_46 -0.8 raster_1
## 47 24 92 raster_2 raster_47 -0.7 raster_2
## 48 24 93 raster_3 raster_48 0.0 raster_3
## 49 24 120 raster_4 raster_49 1.8 raster_4
## 50 25 85 raster_5 raster_50 -0.5 raster_5
summarize(data_all,
mean_speed = mean(sample),
max_dist = max( "total dist" ))
## mean_speed max_dist
## 1 -0.2302 total dist
data_all_group <- group_by(data_all, group.x)
summarize(data_all_group,
mean_speed = mean(sample),
max_dist = max( "total dist" ))
## # A tibble: 5 × 3
## group.x mean_speed max_dist
## <chr> <dbl> <chr>
## 1 raster_1 -0.491 total dist
## 2 raster_2 -0.383 total dist
## 3 raster_3 -0.08 total dist
## 4 raster_4 0.193 total dist
## 5 raster_5 -0.39 total dist
ungroup_data <- ungroup( data_all_group)
summarize( ungroup_data ,
mean_speed = mean(sample),
max_dist = max( "total dist" ))
## # A tibble: 1 × 2
## mean_speed max_dist
## <dbl> <chr>
## 1 -0.230 total dist
mod1 <- lm(cars$`total dist` ~ cars$`speed per hour` )
summary(mod1)
##
## Call:
## lm(formula = cars$`total dist` ~ cars$`speed per hour`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29.069 -9.525 -2.272 9.215 43.201
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -17.5791 6.7584 -2.601 0.0123 *
## cars$`speed per hour` 3.9324 0.4155 9.464 1.49e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.38 on 48 degrees of freedom
## Multiple R-squared: 0.6511, Adjusted R-squared: 0.6438
## F-statistic: 89.57 on 1 and 48 DF, p-value: 1.49e-12
table(data_all_group$`speed per hour`,data_all_group$group.x )
##
## raster_1 raster_2 raster_3 raster_4 raster_5
## 4 1 1 0 0 0
## 7 0 0 1 1 0
## 8 0 0 0 0 1
## 9 1 0 0 0 0
## 10 0 1 1 1 0
## 11 1 0 0 0 1
## 12 0 1 1 1 1
## 13 1 1 1 1 0
## 14 1 1 1 0 1
## 15 1 0 0 1 1
## 16 0 1 1 0 0
## 17 1 0 0 1 1
## 18 0 1 1 1 1
## 19 1 1 1 0 0
## 20 1 1 1 1 1
## 22 0 0 0 1 0
## 23 0 0 0 0 1
## 24 1 1 1 1 0
## 25 0 0 0 0 1