The essentials of R

Manipulation of vector

vec <- c(3,5,2,1,5,"O",NA)
length(unique(vec))
## [1] 6
num_vec <- as.numeric(vec)
log(num_vec)
## [1] 1.0986123 1.6094379 0.6931472 0.0000000 1.6094379        NA        NA
sum(c(num_vec, NA), na.rm=T)
## [1] 16
sort(num_vec, decreasing = T)
## [1] 5 5 3 2 1
is.na(num_vec)
## [1] FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE
num_vec[!is.na(num_vec)]
## [1] 3 5 2 1 5
c(5,6) %in% vec
## [1]  TRUE FALSE
grepl("5", vec)
## [1] FALSE  TRUE FALSE FALSE  TRUE FALSE FALSE

Generate sequence or repeted sequece

seq(from = 0, to = 10, by = 0.5)
##  [1]  0.0  0.5  1.0  1.5  2.0  2.5  3.0  3.5  4.0  4.5  5.0  5.5  6.0  6.5  7.0
## [16]  7.5  8.0  8.5  9.0  9.5 10.0
rep(x = 1:3, times = 4)
##  [1] 1 2 3 1 2 3 1 2 3 1 2 3
rep(x = 1:3, each = 4)
##  [1] 1 1 1 1 2 2 2 2 3 3 3 3

Get directory and write data out and in

getwd()
## [1] "C:/Users/hed2/Downloads/code-storage/code"
setwd(getwd())
write.csv(cars, "cars.csv", row.names=F)
dataframe  <- read.csv("cars.csv")

Function

my_func <- function(x){
  x_mod <- (x + 7) * 4
  return(x_mod)
}

my_func(num_vec)
## [1] 40 48 36 32 48 NA NA

Plot

plot(dist ~ speed, data=cars)

hist(cars$dist )

### Build model and plot

model <- lm(dist ~ speed, data=cars)
plot(dist ~ speed, data=cars)
abline(model)
abline(v = 25)
abline(h = 15)

### Rename names of columns

names(cars)
## [1] "speed" "dist"
names(cars) <- c("speed per hour", "total dist")

Class of dataframe

matrix <- as.matrix(cars)
df <- as.data.frame(matrix)
class(matrix)
## [1] "matrix" "array"
class(df)
## [1] "data.frame"
# tranform
t(matrix)
##                [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
## speed per hour    4    4    7    7    8    9   10   10   10    11    11    12
## total dist        2   10    4   22   16   10   18   26   34    17    28    14
##                [,13] [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22]
## speed per hour    12    12    12    13    13    13    13    14    14    14
## total dist        20    24    28    26    34    34    46    26    36    60
##                [,23] [,24] [,25] [,26] [,27] [,28] [,29] [,30] [,31] [,32]
## speed per hour    14    15    15    15    16    16    17    17    17    18
## total dist        80    20    26    54    32    40    32    40    50    42
##                [,33] [,34] [,35] [,36] [,37] [,38] [,39] [,40] [,41] [,42]
## speed per hour    18    18    18    19    19    19    20    20    20    20
## total dist        56    76    84    36    46    68    32    48    52    56
##                [,43] [,44] [,45] [,46] [,47] [,48] [,49] [,50]
## speed per hour    20    22    23    24    24    24    24    25
## total dist        64    66    54    70    92    93   120    85

Generate new variable for dataframe (character)

paste0("raster_", 1:10)
##  [1] "raster_1"  "raster_2"  "raster_3"  "raster_4"  "raster_5"  "raster_6" 
##  [7] "raster_7"  "raster_8"  "raster_9"  "raster_10"
paste0("raster_", rep(x = 1:5, times = 10))
##  [1] "raster_1" "raster_2" "raster_3" "raster_4" "raster_5" "raster_1"
##  [7] "raster_2" "raster_3" "raster_4" "raster_5" "raster_1" "raster_2"
## [13] "raster_3" "raster_4" "raster_5" "raster_1" "raster_2" "raster_3"
## [19] "raster_4" "raster_5" "raster_1" "raster_2" "raster_3" "raster_4"
## [25] "raster_5" "raster_1" "raster_2" "raster_3" "raster_4" "raster_5"
## [31] "raster_1" "raster_2" "raster_3" "raster_4" "raster_5" "raster_1"
## [37] "raster_2" "raster_3" "raster_4" "raster_5" "raster_1" "raster_2"
## [43] "raster_3" "raster_4" "raster_5" "raster_1" "raster_2" "raster_3"
## [49] "raster_4" "raster_5"
df$group <- paste0("raster_", rep(x = 1:5, times = 10))
df$id <-  paste0("raster_",  1:50)

Create a new dataframe using ‘rnorm’ - random number from distribution

sample <-  round((rnorm(50,0, 1)),2)
group <- paste0("raster_", rep(x = 1:5, times = 10))

df_join <- data.frame(sample, group)
df_join$id <-  paste0("raster_",  1:50)

Left join two dataframes

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
data_all <- left_join(df, df_join, by="id")
head(data_all)
##   speed per hour total dist  group.x       id sample  group.y
## 1              4          2 raster_1 raster_1   0.93 raster_1
## 2              4         10 raster_2 raster_2   0.19 raster_2
## 3              7          4 raster_3 raster_3   0.02 raster_3
## 4              7         22 raster_4 raster_4  -0.23 raster_4
## 5              8         16 raster_5 raster_5  -0.48 raster_5
## 6              9         10 raster_1 raster_6   0.42 raster_1

Select variables

select(data_all, group.x, id  )
##     group.x        id
## 1  raster_1  raster_1
## 2  raster_2  raster_2
## 3  raster_3  raster_3
## 4  raster_4  raster_4
## 5  raster_5  raster_5
## 6  raster_1  raster_6
## 7  raster_2  raster_7
## 8  raster_3  raster_8
## 9  raster_4  raster_9
## 10 raster_5 raster_10
## 11 raster_1 raster_11
## 12 raster_2 raster_12
## 13 raster_3 raster_13
## 14 raster_4 raster_14
## 15 raster_5 raster_15
## 16 raster_1 raster_16
## 17 raster_2 raster_17
## 18 raster_3 raster_18
## 19 raster_4 raster_19
## 20 raster_5 raster_20
## 21 raster_1 raster_21
## 22 raster_2 raster_22
## 23 raster_3 raster_23
## 24 raster_4 raster_24
## 25 raster_5 raster_25
## 26 raster_1 raster_26
## 27 raster_2 raster_27
## 28 raster_3 raster_28
## 29 raster_4 raster_29
## 30 raster_5 raster_30
## 31 raster_1 raster_31
## 32 raster_2 raster_32
## 33 raster_3 raster_33
## 34 raster_4 raster_34
## 35 raster_5 raster_35
## 36 raster_1 raster_36
## 37 raster_2 raster_37
## 38 raster_3 raster_38
## 39 raster_4 raster_39
## 40 raster_5 raster_40
## 41 raster_1 raster_41
## 42 raster_2 raster_42
## 43 raster_3 raster_43
## 44 raster_4 raster_44
## 45 raster_5 raster_45
## 46 raster_1 raster_46
## 47 raster_2 raster_47
## 48 raster_3 raster_48
## 49 raster_4 raster_49
## 50 raster_5 raster_50

Filter observations

raster_1 <- filter(data_all, group.x == "raster_1")
raster_1
##    speed per hour total dist  group.x        id sample  group.y
## 1               4          2 raster_1  raster_1   0.93 raster_1
## 2               9         10 raster_1  raster_6   0.42 raster_1
## 3              11         28 raster_1 raster_11  -0.11 raster_1
## 4              13         26 raster_1 raster_16  -0.91 raster_1
## 5              14         36 raster_1 raster_21  -0.54 raster_1
## 6              15         54 raster_1 raster_26  -1.08 raster_1
## 7              17         50 raster_1 raster_31  -0.24 raster_1
## 8              19         36 raster_1 raster_36  -2.27 raster_1
## 9              20         52 raster_1 raster_41  -0.34 raster_1
## 10             24         70 raster_1 raster_46  -0.77 raster_1
speed_dist <- filter(data_all, data_all$`speed per hour` < 11 & data_all$`total dist` >= 10)
speed_dist
##   speed per hour total dist  group.x       id sample  group.y
## 1              4         10 raster_2 raster_2   0.19 raster_2
## 2              7         22 raster_4 raster_4  -0.23 raster_4
## 3              8         16 raster_5 raster_5  -0.48 raster_5
## 4              9         10 raster_1 raster_6   0.42 raster_1
## 5             10         18 raster_2 raster_7  -0.37 raster_2
## 6             10         26 raster_3 raster_8  -0.57 raster_3
## 7             10         34 raster_4 raster_9   0.54 raster_4

Append rows

rbind(raster_1,speed_dist)
##    speed per hour total dist  group.x        id sample  group.y
## 1               4          2 raster_1  raster_1   0.93 raster_1
## 2               9         10 raster_1  raster_6   0.42 raster_1
## 3              11         28 raster_1 raster_11  -0.11 raster_1
## 4              13         26 raster_1 raster_16  -0.91 raster_1
## 5              14         36 raster_1 raster_21  -0.54 raster_1
## 6              15         54 raster_1 raster_26  -1.08 raster_1
## 7              17         50 raster_1 raster_31  -0.24 raster_1
## 8              19         36 raster_1 raster_36  -2.27 raster_1
## 9              20         52 raster_1 raster_41  -0.34 raster_1
## 10             24         70 raster_1 raster_46  -0.77 raster_1
## 11              4         10 raster_2  raster_2   0.19 raster_2
## 12              7         22 raster_4  raster_4  -0.23 raster_4
## 13              8         16 raster_5  raster_5  -0.48 raster_5
## 14              9         10 raster_1  raster_6   0.42 raster_1
## 15             10         18 raster_2  raster_7  -0.37 raster_2
## 16             10         26 raster_3  raster_8  -0.57 raster_3
## 17             10         34 raster_4  raster_9   0.54 raster_4

Create new variables instead of old variables

mutate(data_all, 
       sample = round(sample,1))
##    speed per hour total dist  group.x        id sample  group.y
## 1               4          2 raster_1  raster_1    0.9 raster_1
## 2               4         10 raster_2  raster_2    0.2 raster_2
## 3               7          4 raster_3  raster_3    0.0 raster_3
## 4               7         22 raster_4  raster_4   -0.2 raster_4
## 5               8         16 raster_5  raster_5   -0.5 raster_5
## 6               9         10 raster_1  raster_6    0.4 raster_1
## 7              10         18 raster_2  raster_7   -0.4 raster_2
## 8              10         26 raster_3  raster_8   -0.6 raster_3
## 9              10         34 raster_4  raster_9    0.5 raster_4
## 10             11         17 raster_5 raster_10    0.9 raster_5
## 11             11         28 raster_1 raster_11   -0.1 raster_1
## 12             12         14 raster_2 raster_12   -0.4 raster_2
## 13             12         20 raster_3 raster_13   -0.3 raster_3
## 14             12         24 raster_4 raster_14   -1.2 raster_4
## 15             12         28 raster_5 raster_15    0.7 raster_5
## 16             13         26 raster_1 raster_16   -0.9 raster_1
## 17             13         34 raster_2 raster_17    1.4 raster_2
## 18             13         34 raster_3 raster_18    0.1 raster_3
## 19             13         46 raster_4 raster_19   -0.1 raster_4
## 20             14         26 raster_5 raster_20   -0.7 raster_5
## 21             14         36 raster_1 raster_21   -0.5 raster_1
## 22             14         60 raster_2 raster_22   -1.1 raster_2
## 23             14         80 raster_3 raster_23   -1.1 raster_3
## 24             15         20 raster_4 raster_24    0.0 raster_4
## 25             15         26 raster_5 raster_25   -0.2 raster_5
## 26             15         54 raster_1 raster_26   -1.1 raster_1
## 27             16         32 raster_2 raster_27   -1.0 raster_2
## 28             16         40 raster_3 raster_28    1.0 raster_3
## 29             17         32 raster_4 raster_29   -1.2 raster_4
## 30             17         40 raster_5 raster_30   -1.2 raster_5
## 31             17         50 raster_1 raster_31   -0.2 raster_1
## 32             18         42 raster_2 raster_32   -1.6 raster_2
## 33             18         56 raster_3 raster_33    0.8 raster_3
## 34             18         76 raster_4 raster_34    1.9 raster_4
## 35             18         84 raster_5 raster_35    0.0 raster_5
## 36             19         36 raster_1 raster_36   -2.3 raster_1
## 37             19         46 raster_2 raster_37   -0.3 raster_2
## 38             19         68 raster_3 raster_38    0.0 raster_3
## 39             20         32 raster_4 raster_39   -0.5 raster_4
## 40             20         48 raster_5 raster_40   -0.8 raster_5
## 41             20         52 raster_1 raster_41   -0.3 raster_1
## 42             20         56 raster_2 raster_42    0.0 raster_2
## 43             20         64 raster_3 raster_43   -0.8 raster_3
## 44             22         66 raster_4 raster_44    0.9 raster_4
## 45             23         54 raster_5 raster_45   -1.6 raster_5
## 46             24         70 raster_1 raster_46   -0.8 raster_1
## 47             24         92 raster_2 raster_47   -0.7 raster_2
## 48             24         93 raster_3 raster_48    0.0 raster_3
## 49             24        120 raster_4 raster_49    1.8 raster_4
## 50             25         85 raster_5 raster_50   -0.5 raster_5

Summarize statistics

summarize(data_all, 
          mean_speed = mean(sample),
          max_dist = max( "total dist" ))
##   mean_speed   max_dist
## 1    -0.2302 total dist

Group dataframe then summarize statistics

data_all_group <- group_by(data_all, group.x)
summarize(data_all_group, 
          mean_speed = mean(sample),
          max_dist = max( "total dist" ))
## # A tibble: 5 × 3
##   group.x  mean_speed max_dist  
##   <chr>         <dbl> <chr>     
## 1 raster_1     -0.491 total dist
## 2 raster_2     -0.383 total dist
## 3 raster_3     -0.08  total dist
## 4 raster_4      0.193 total dist
## 5 raster_5     -0.39  total dist

Ungroup then summarize statistics

ungroup_data <- ungroup( data_all_group)
summarize(  ungroup_data , 
          mean_speed = mean(sample),
          max_dist = max( "total dist" ))
## # A tibble: 1 × 2
##   mean_speed max_dist  
##        <dbl> <chr>     
## 1     -0.230 total dist

Summary linear regression model

mod1 <- lm(cars$`total dist` ~ cars$`speed per hour` )
summary(mod1) 
## 
## Call:
## lm(formula = cars$`total dist` ~ cars$`speed per hour`)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -29.069  -9.525  -2.272   9.215  43.201 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           -17.5791     6.7584  -2.601   0.0123 *  
## cars$`speed per hour`   3.9324     0.4155   9.464 1.49e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.38 on 48 degrees of freedom
## Multiple R-squared:  0.6511, Adjusted R-squared:  0.6438 
## F-statistic: 89.57 on 1 and 48 DF,  p-value: 1.49e-12

Create frequency table

table(data_all_group$`speed per hour`,data_all_group$group.x  )
##     
##      raster_1 raster_2 raster_3 raster_4 raster_5
##   4         1        1        0        0        0
##   7         0        0        1        1        0
##   8         0        0        0        0        1
##   9         1        0        0        0        0
##   10        0        1        1        1        0
##   11        1        0        0        0        1
##   12        0        1        1        1        1
##   13        1        1        1        1        0
##   14        1        1        1        0        1
##   15        1        0        0        1        1
##   16        0        1        1        0        0
##   17        1        0        0        1        1
##   18        0        1        1        1        1
##   19        1        1        1        0        0
##   20        1        1        1        1        1
##   22        0        0        0        1        0
##   23        0        0        0        0        1
##   24        1        1        1        1        0
##   25        0        0        0        0        1