This is an evolving personal reference scrapbook of R commands and examples.
Some of it might be borrowed, or nicked from others, but done with love.
mtcars %>%
tibble::rownames_to_column(var="car_name") %>%
select(car_name, mpg, disp, gear) %>%
filter(mpg > 15, ( disp > 150 | gear != 4))
## car_name mpg disp gear
## 1 Mazda RX4 21.0 160.0 4
## 2 Mazda RX4 Wag 21.0 160.0 4
## 3 Hornet 4 Drive 21.4 258.0 3
## 4 Hornet Sportabout 18.7 360.0 3
## 5 Valiant 18.1 225.0 3
## 6 Merc 280 19.2 167.6 4
## 7 Merc 280C 17.8 167.6 4
## 8 Merc 450SE 16.4 275.8 3
## 9 Merc 450SL 17.3 275.8 3
## 10 Merc 450SLC 15.2 275.8 3
## 11 Toyota Corona 21.5 120.1 3
## 12 Dodge Challenger 15.5 318.0 3
## 13 AMC Javelin 15.2 304.0 3
## 14 Pontiac Firebird 19.2 400.0 3
## 15 Porsche 914-2 26.0 120.3 5
## 16 Lotus Europa 30.4 95.1 5
## 17 Ford Pantera L 15.8 351.0 5
## 18 Ferrari Dino 19.7 145.0 5
If you surround a statement in parentheses, it displays the statement outcome. This can be handy and reduces the amount of code, with a small abstraction risk to R noobs.
(5 + 2)
## [1] 7
(x <- -1:12)
## [1] -1 0 1 2 3 4 5 6 7 8 9 10 11 12
x %/% 5
## [1] -1 0 0 0 0 0 1 1 1 1 1 2 2 2
(x <- -1:12)
## [1] -1 0 1 2 3 4 5 6 7 8 9 10 11 12
x %% 5
## [1] 4 0 1 2 3 4 0 1 2 3 4 0 1 2
This the exposition operator from the magrittr packages. It exposes the names within the left-hand side object to the right-hand side expression. So you can write this:
mtcars %$%
hist(mpg)
nycflights13::airlines
## # A tibble: 16 × 2
## carrier name
## <chr> <chr>
## 1 9E Endeavor Air Inc.
## 2 AA American Airlines Inc.
## 3 AS Alaska Airlines Inc.
## 4 B6 JetBlue Airways
## 5 DL Delta Air Lines Inc.
## 6 EV ExpressJet Airlines Inc.
## 7 F9 Frontier Airlines Inc.
## 8 FL AirTran Airways Corporation
## 9 HA Hawaiian Airlines Inc.
## 10 MQ Envoy Air
## 11 OO SkyWest Airlines Inc.
## 12 UA United Air Lines Inc.
## 13 US US Airways Inc.
## 14 VX Virgin America
## 15 WN Southwest Airlines Co.
## 16 YV Mesa Airlines Inc.
nycflights13::airlines %>%
select(carrier, name) %>%
filter(carrier %in% c("AA", "B6", "DL"))
## # A tibble: 3 × 2
## carrier name
## <chr> <chr>
## 1 AA American Airlines Inc.
## 2 B6 JetBlue Airways
## 3 DL Delta Air Lines Inc.
drop <- c("AA", "B6", "DL")
nycflights13::airlines %>%
select(carrier, name) %>%
filter(!carrier %in% drop)
## # A tibble: 13 × 2
## carrier name
## <chr> <chr>
## 1 9E Endeavor Air Inc.
## 2 AS Alaska Airlines Inc.
## 3 EV ExpressJet Airlines Inc.
## 4 F9 Frontier Airlines Inc.
## 5 FL AirTran Airways Corporation
## 6 HA Hawaiian Airlines Inc.
## 7 MQ Envoy Air
## 8 OO SkyWest Airlines Inc.
## 9 UA United Air Lines Inc.
## 10 US US Airways Inc.
## 11 VX Virgin America
## 12 WN Southwest Airlines Co.
## 13 YV Mesa Airlines Inc.
# get the mean mpg by cylinder
aggregate(data = mtcars, mpg ~ cyl, mean, na.rm = TRUE)
## cyl mpg
## 1 4 26.66364
## 2 6 19.74286
## 3 8 15.10000
aggregate(mtcars$mpg, list(Cylinder = mtcars$cyl), mean, na.rm = T) # variant
## Cylinder x
## 1 4 26.66364
## 2 6 19.74286
## 3 8 15.10000
aggregate(mpg ~ cyl, mtcars, mean) # variant
## cyl mpg
## 1 4 26.66364
## 2 6 19.74286
## 3 8 15.10000
aggregate(cbind(mpg, disp) ~ cyl, mtcars, mean)
## cyl mpg disp
## 1 4 26.66364 105.1364
## 2 6 19.74286 183.3143
## 3 8 15.10000 353.1000
# get the mean of all variables, grouped by another variable
aggregate(data = mtcars, . ~ cyl, mean, na.rm = TRUE)
## cyl mpg disp hp drat wt qsec vs
## 1 4 26.66364 105.1364 82.63636 4.070909 2.285727 19.13727 0.9090909
## 2 6 19.74286 183.3143 122.28571 3.585714 3.117143 17.97714 0.5714286
## 3 8 15.10000 353.1000 209.21429 3.229286 3.999214 16.77214 0.0000000
## am gear carb
## 1 0.7272727 4.090909 1.545455
## 2 0.4285714 3.857143 3.428571
## 3 0.1428571 3.285714 3.500000
# get the maximum of all variables, grouped by another variable
aggregate(data = mtcars, . ~ cyl, max, na.rm = TRUE)
## cyl mpg disp hp drat wt qsec vs am gear carb
## 1 4 33.9 146.7 113 4.93 3.190 22.90 1 1 5 2
## 2 6 21.4 258.0 175 3.92 3.460 20.22 1 1 5 6
## 3 8 19.2 472.0 335 4.22 5.424 18.00 0 1 5 8
# standard deviation
aggregate(mtcars$mpg, list(Cylinder = mtcars$cyl), sd, na.rm = T)
## Cylinder x
## 1 4 4.509828
## 2 6 1.453567
## 3 8 2.560048
aggregate(. ~ cyl + gear, mtcars, mean)
## cyl gear mpg disp hp drat wt qsec vs am carb
## 1 4 3 21.500 120.1000 97.0000 3.700000 2.465000 20.0100 1.0 0.00 1.000000
## 2 6 3 19.750 241.5000 107.5000 2.920000 3.337500 19.8300 1.0 0.00 1.000000
## 3 8 3 15.050 357.6167 194.1667 3.120833 4.104083 17.1425 0.0 0.00 3.083333
## 4 4 4 26.925 102.6250 76.0000 4.110000 2.378125 19.6125 1.0 0.75 1.500000
## 5 6 4 19.750 163.8000 116.5000 3.910000 3.093750 17.6700 0.5 0.50 4.000000
## 6 4 5 28.200 107.7000 102.0000 4.100000 1.826500 16.8000 0.5 1.00 2.000000
## 7 6 5 19.700 145.0000 175.0000 3.620000 2.770000 15.5000 0.0 1.00 6.000000
## 8 8 5 15.400 326.0000 299.5000 3.880000 3.370000 14.5500 0.0 1.00 6.000000
# calculate the mean mpg by all the other variables
# this doesn't make much sense for the mtcars dataset
# but it might for a different dataset
aggregate(mpg ~ ., mtcars, mean)
## cyl disp hp drat wt qsec vs am gear carb mpg
## 1 6 258.0 110 3.08 3.215 19.44 1 0 3 1 21.4
## 2 4 120.1 97 3.70 2.465 20.01 1 0 3 1 21.5
## 3 6 225.0 105 2.76 3.460 20.22 1 0 3 1 18.1
## 4 4 108.0 93 3.85 2.320 18.61 1 1 4 1 22.8
## 5 4 79.0 66 4.08 1.935 18.90 1 1 4 1 27.3
## 6 4 78.7 66 4.08 2.200 19.47 1 1 4 1 32.4
## 7 4 71.1 65 4.22 1.835 19.90 1 1 4 1 33.9
## 8 8 318.0 150 2.76 3.520 16.87 0 0 3 2 15.5
## 9 8 360.0 175 3.15 3.440 17.02 0 0 3 2 18.7
## 10 8 400.0 175 3.08 3.845 17.05 0 0 3 2 19.2
## 11 8 304.0 150 3.15 3.435 17.30 0 0 3 2 15.2
## 12 4 146.7 62 3.69 3.190 20.00 1 0 4 2 24.4
## 13 4 140.8 95 3.92 3.150 22.90 1 0 4 2 22.8
## 14 4 75.7 52 4.93 1.615 18.52 1 1 4 2 30.4
## 15 4 121.0 109 4.11 2.780 18.60 1 1 4 2 21.4
## 16 4 120.3 91 4.43 2.140 16.70 0 1 5 2 26.0
## 17 4 95.1 113 3.77 1.513 16.90 1 1 5 2 30.4
## 18 8 275.8 180 3.07 4.070 17.40 0 0 3 3 16.4
## 19 8 275.8 180 3.07 3.730 17.60 0 0 3 3 17.3
## 20 8 275.8 180 3.07 3.780 18.00 0 0 3 3 15.2
## 21 8 350.0 245 3.73 3.840 15.41 0 0 3 4 13.3
## 22 8 360.0 245 3.21 3.570 15.84 0 0 3 4 14.3
## 23 8 440.0 230 3.23 5.345 17.42 0 0 3 4 14.7
## 24 8 460.0 215 3.00 5.424 17.82 0 0 3 4 10.4
## 25 8 472.0 205 2.93 5.250 17.98 0 0 3 4 10.4
## 26 6 167.6 123 3.92 3.440 18.30 1 0 4 4 19.2
## 27 6 167.6 123 3.92 3.440 18.90 1 0 4 4 17.8
## 28 6 160.0 110 3.90 2.620 16.46 0 1 4 4 21.0
## 29 6 160.0 110 3.90 2.875 17.02 0 1 4 4 21.0
## 30 8 351.0 264 4.22 3.170 14.50 0 1 5 4 15.8
## 31 6 145.0 175 3.62 2.770 15.50 0 1 5 6 19.7
## 32 8 301.0 335 3.54 3.570 14.60 0 1 5 8 15.0
range(x <- sort(round(stats::rnorm(10) - 1.2, 1)))
## [1] -2.0 0.8
if(all(x < 0)) cat("all x values are negative\n")
V1 <- c(TRUE, TRUE, TRUE)
V2 <- c(TRUE, TRUE, FALSE)
V3 <- c(FALSE, FALSE, FALSE)
all(V1)
## [1] TRUE
all(V2)
## [1] FALSE
all(V3)
## [1] FALSE
Given a set of logical vectors, is at least one (any) of the values true?
range(x <- sort(round(stats::rnorm(10) - 1.2, 1)))
## [1] -2.6 0.4
if(any(x < 0)) cat("x contains negative values\n")
## x contains negative values
V1 <- c(TRUE, TRUE, TRUE)
V2 <- c(TRUE, TRUE, FALSE)
V3 <- c(FALSE, FALSE, FALSE)
any(V1)
## [1] TRUE
any(V2)
## [1] TRUE
any(V3)
## [1] FALSE
head(cars)
## speed dist
## 1 4 2
## 2 4 10
## 3 7 4
## 4 7 22
## 5 8 16
## 6 9 10
apply(cars, 1, mean) # 1 = rows
## [1] 3.0 7.0 5.5 14.5 12.0 9.5 14.0 18.0 22.0 14.0 19.5 13.0 16.0 18.0 20.0
## [16] 19.5 23.5 23.5 29.5 20.0 25.0 37.0 47.0 17.5 20.5 34.5 24.0 28.0 24.5 28.5
## [31] 33.5 30.0 37.0 47.0 51.0 27.5 32.5 43.5 26.0 34.0 36.0 38.0 42.0 44.0 38.5
## [46] 47.0 58.0 58.5 72.0 55.0
apply(cars, 2, mean) # 2 = columns
## speed dist
## 15.40 42.98
apply(cars, 2, max) # 2 = columns
## speed dist
## 25 120
apropos(“read.csv”) # the names of all objects in the search list matching the regular expression ”topic”
mtcars %>%
tibble::rownames_to_column(var="car_name") %>%
select(car_name, cyl, disp) %>%
filter(between(disp, 100, 200))
## car_name cyl disp
## 1 Mazda RX4 6 160.0
## 2 Mazda RX4 Wag 6 160.0
## 3 Datsun 710 4 108.0
## 4 Merc 240D 4 146.7
## 5 Merc 230 4 140.8
## 6 Merc 280 6 167.6
## 7 Merc 280C 6 167.6
## 8 Toyota Corona 4 120.1
## 9 Porsche 914-2 4 120.3
## 10 Ferrari Dino 6 145.0
## 11 Volvo 142E 4 121.0
c(1,7:9)
## [1] 1 7 8 9
c(1:5, 10.5, "next")
## [1] "1" "2" "3" "4" "5" "10.5" "next"
## uses with a single argument to drop attributes
(x <- 1:4)
## [1] 1 2 3 4
names(x) <- letters[1:4] # the names function assigns names to columns
x
## a b c d
## 1 2 3 4
c(x) # has names
## a b c d
## 1 2 3 4
as.vector(x) # no names
## [1] 1 2 3 4
dim(x) <- c(2,2)
x
## [,1] [,2]
## [1,] 1 3
## [2,] 2 4
c(x)
## [1] 1 2 3 4
as.vector(x)
## [1] 1 2 3 4
## append to a list:
ll <- list(A = 1, c = "C")
ll
## $A
## [1] 1
##
## $c
## [1] "C"
## do *not* use
c(ll, d = 1:3) # which is == c(ll, as.list(c(d = 1:3))
## $A
## [1] 1
##
## $c
## [1] "C"
##
## $d1
## [1] 1
##
## $d2
## [1] 2
##
## $d3
## [1] 3
## but rather
c(ll, d = list(1:3)) # c() combining two lists
## $A
## [1] 1
##
## $c
## [1] "C"
##
## $d
## [1] 1 2 3
c(list(A = c(B = 1)), recursive = TRUE)
## A.B
## 1
c(options(), recursive = TRUE)
## $add.smooth
## [1] TRUE
##
## $bitmapType
## [1] "cairo"
##
## $browser
## [1] "xdg-open"
##
## $browserNLdisabled
## [1] FALSE
##
## $browserNLdisabled
## [1] FALSE
##
## $callr.condition_handler_cli_message
## function (msg)
## {
## custom_handler <- getOption("cli.default_handler")
## if (is.function(custom_handler)) {
## custom_handler(msg)
## }
## else {
## cli_server_default(msg)
## }
## }
## <bytecode: 0x55d6ebd0b170>
## <environment: namespace:cli>
##
## $CBoundsCheck
## [1] FALSE
##
## $check.bounds
## [1] FALSE
##
## $citation.bibtex.max
## [1] 1
##
## $continue
## [1] "+ "
##
## $contrasts.unordered
## [1] "contr.treatment"
##
## $contrasts.ordered
## [1] "contr.poly"
##
## $cpp11_preserve_xptr
## <pointer: 0x55d6ee9b2ad0>
##
## $cpp11_should_unwind_protect
## [1] TRUE
##
## $defaultPackages1
## [1] "datasets"
##
## $defaultPackages2
## [1] "utils"
##
## $defaultPackages3
## [1] "grDevices"
##
## $defaultPackages4
## [1] "graphics"
##
## $defaultPackages5
## [1] "stats"
##
## $defaultPackages6
## [1] "methods"
##
## $demo.ask
## [1] "default"
##
## $deparse.cutoff
## [1] 60
##
## $device
## function (width = 7, height = 7, ...)
## {
## grDevices::pdf(NULL, width, height, ...)
## }
## <bytecode: 0x55d6ec5acf48>
## <environment: namespace:knitr>
##
## $device.ask.default
## [1] FALSE
##
## $digits
## [1] 7
##
## $dplyr.show_progress
## [1] TRUE
##
## $dvipscmd
## [1] "dvips"
##
## $echo
## [1] FALSE
##
## $editor
## [1] "vi"
##
## $encoding
## [1] "native.enc"
##
## $example.ask
## [1] "default"
##
## $expressions
## [1] 5000
##
## $help.search.types1
## [1] "vignette"
##
## $help.search.types2
## [1] "demo"
##
## $help.search.types3
## [1] "help"
##
## $help.try.all.packages
## [1] FALSE
##
## $htmltools.preserve.raw
## [1] TRUE
##
## $HTTPUserAgent
## [1] "R (4.3.0 x86_64-pc-linux-gnu x86_64 linux-gnu)"
##
## $internet.info
## [1] 2
##
## $keep.parse.data
## [1] TRUE
##
## $keep.parse.data.pkgs
## [1] FALSE
##
## $keep.source
## [1] FALSE
##
## $keep.source.pkgs
## [1] FALSE
##
## $knitr.in.progress
## [1] TRUE
##
## $locatorBell
## [1] TRUE
##
## $mailer
## [1] "mailto"
##
## $matprod
## [1] "default"
##
## $max.contour.segments
## [1] 25000
##
## $max.print
## [1] 99999
##
## $menu.graphics
## [1] TRUE
##
## $na.action
## [1] "na.omit"
##
## $nwarnings
## [1] 50
##
## $OutDec
## [1] "."
##
## $pager
## [1] "/usr/lib/R/bin/pager"
##
## $papersize
## [1] "letter"
##
## $PCRE_limit_recursion
## [1] NA
##
## $PCRE_study
## [1] FALSE
##
## $PCRE_use_JIT
## [1] TRUE
##
## $pdfviewer
## [1] "/usr/bin/xdg-open"
##
## $pkgType
## [1] "source"
##
## $printcmd
## [1] "/usr/bin/lpr"
##
## $prompt
## [1] "> "
##
## $readr.show_progress
## [1] TRUE
##
## $repos.CRAN
## [1] "https://cloud.r-project.org"
##
## $rl_word_breaks
## [1] " \t\n\"\\'`><=%;,|&{()}"
##
## $scipen
## [1] 0
##
## $show.coef.Pvalues
## [1] TRUE
##
## $show.error.messages
## [1] TRUE
##
## $show.signif.stars
## [1] TRUE
##
## $showErrorCalls
## [1] TRUE
##
## $showNCalls
## [1] 50
##
## $showWarnCalls
## [1] FALSE
##
## $str.strict.width
## [1] "no"
##
## $str.digits.d
## [1] 3
##
## $str.vec.len
## [1] 4
##
## $str.list.len
## [1] 99
##
## $str.drop.deparse.attr
## [1] TRUE
##
## $str.formatNum
## function (x, ...)
## format(x, trim = TRUE, drop0trailing = TRUE, ...)
## <environment: 0x55d6ebcfe020>
##
## $str.dendrogram.last
## [1] "`"
##
## $texi2dvi
## [1] "/usr/bin/texi2dvi"
##
## $tikzMetricsDictionary
## [1] "Command-Reference-for-R-1ed-v01-tikzDictionary"
##
## $timeout
## [1] 60
##
## $try.outFile
## [1] 3
##
## $ts.eps
## [1] 1e-05
##
## $ts.S.compat
## [1] FALSE
##
## $unzip
## [1] "/usr/bin/unzip"
##
## $useFancyQuotes
## [1] FALSE
##
## $verbose
## [1] FALSE
##
## $warn
## [1] 0
##
## $warning.length
## [1] 1000
##
## $warnPartialMatchArgs
## [1] TRUE
##
## $warnPartialMatchAttr
## [1] FALSE
##
## $warnPartialMatchDollar
## [1] FALSE
##
## $width
## [1] 80
c(list(A = c(B = 1, C = 2), B = c(E = 7)), recursive = TRUE)
## A.B A.C B.E
## 1 2 7
library(tidyverse)
mtcars %>%
mutate(carb_new = case_when(.$carb == 1 ~ "one",
.$carb == 2 ~ "two",
.$carb == 4 ~ "four",
TRUE ~ "other")) %>%
head(15)
## mpg cyl disp hp drat wt qsec vs am gear carb carb_new
## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 four
## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 four
## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 one
## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 one
## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 two
## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 one
## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 four
## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 two
## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 two
## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4 four
## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4 four
## Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3 other
## Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3 other
## Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3 other
## Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4 four
# source: https://www.r-bloggers.com/lesser-known-dplyr-tricks-2/
ceiling takes a single numeric argument x and returns a numeric vector containing the smallest integers not less than the corresponding elements of x.
x = 4.33432 ; ceiling(x)
## [1] 5
x = c(4.3, 5.3, 6.9) ; ceiling(x)
## [1] 5 6 7
choose(52, 5) # how many poker hands are there?
## [1] 2598960
choose(50, 5) * choose(12, 2) # euromillions odds
## [1] 139838160
choose(length(colors), 1) + choose(length(colors), 2) + choose(length(colors), 3) # how many combinations of 1, 2 or 3
## [1] 1
colors <- c("red", "blue", "green", "white")
combn(colors, 2) # how many combinations of colours can I have?
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] "red" "red" "red" "blue" "blue" "green"
## [2,] "blue" "green" "white" "green" "white" "white"
combn(colors, 2, FUN = sort) # same but sort alphabetically
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] "blue" "green" "red" "blue" "blue" "green"
## [2,] "red" "red" "white" "green" "white" "white"
expand.grid(colors, colors, colors) # can pick same colour more than once
## Var1 Var2 Var3
## 1 red red red
## 2 blue red red
## 3 green red red
## 4 white red red
## 5 red blue red
## 6 blue blue red
## 7 green blue red
## 8 white blue red
## 9 red green red
## 10 blue green red
## 11 green green red
## 12 white green red
## 13 red white red
## 14 blue white red
## 15 green white red
## 16 white white red
## 17 red red blue
## 18 blue red blue
## 19 green red blue
## 20 white red blue
## 21 red blue blue
## 22 blue blue blue
## 23 green blue blue
## 24 white blue blue
## 25 red green blue
## 26 blue green blue
## 27 green green blue
## 28 white green blue
## 29 red white blue
## 30 blue white blue
## 31 green white blue
## 32 white white blue
## 33 red red green
## 34 blue red green
## 35 green red green
## 36 white red green
## 37 red blue green
## 38 blue blue green
## 39 green blue green
## 40 white blue green
## 41 red green green
## 42 blue green green
## 43 green green green
## 44 white green green
## 45 red white green
## 46 blue white green
## 47 green white green
## 48 white white green
## 49 red red white
## 50 blue red white
## 51 green red white
## 52 white red white
## 53 red blue white
## 54 blue blue white
## 55 green blue white
## 56 white blue white
## 57 red green white
## 58 blue green white
## 59 green green white
## 60 white green white
## 61 red white white
## 62 blue white white
## 63 green white white
## 64 white white white
c(combn(colors, 1, simplify=FALSE), combn(colors, 2, simplify=FALSE), combn(colors, 3, simplify=FALSE))
## [[1]]
## [1] "red"
##
## [[2]]
## [1] "blue"
##
## [[3]]
## [1] "green"
##
## [[4]]
## [1] "white"
##
## [[5]]
## [1] "red" "blue"
##
## [[6]]
## [1] "red" "green"
##
## [[7]]
## [1] "red" "white"
##
## [[8]]
## [1] "blue" "green"
##
## [[9]]
## [1] "blue" "white"
##
## [[10]]
## [1] "green" "white"
##
## [[11]]
## [1] "red" "blue" "green"
##
## [[12]]
## [1] "red" "blue" "white"
##
## [[13]]
## [1] "red" "green" "white"
##
## [[14]]
## [1] "blue" "green" "white"
Examples of counting things.
mtcars %>% dplyr::count(gear)
## gear n
## 1 3 15
## 2 4 12
## 3 5 5
mtcars %>% dplyr::count(gear, cyl)
## gear cyl n
## 1 3 4 1
## 2 3 6 2
## 3 3 8 12
## 4 4 4 8
## 5 4 6 4
## 6 5 4 2
## 7 5 6 1
## 8 5 8 2
mtcars %>% dplyr::count(mpg = cut_width(mpg, 5))
## mpg n
## 1 [7.5,12.5] 2
## 2 (12.5,17.5] 10
## 3 (17.5,22.5] 11
## 4 (22.5,27.5] 5
## 5 (27.5,32.5] 3
## 6 (32.5,37.5] 1
dplyr::count(mtcars, # dataframe
am, # field to group by
wt = mpg) # field to sum
## am n
## 1 0 325.8
## 2 1 317.1
…which is the same as…
mtcars %>% group_by(am) %>% summarise(n = sum(mpg))
## # A tibble: 2 × 2
## am n
## <dbl> <dbl>
## 1 0 326.
## 2 1 317.
(x <- c(2, 4, 6, 8, 10)); cumprod(x)
## [1] 2 4 6 8 10
## [1] 2 8 48 384 3840
(x <- c(2, 4, 6, 8, 10)); cumsum(x)
## [1] 2 4 6 8 10
## [1] 2 6 12 20 30
DF1 = readr::read_csv("ID, event
7T, A
7T, B
7T, A
7T, A
7A, A
7B, B
7B, B
7B, C
7C, A")
## Rows: 9 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): ID, event
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
DF1$counter = 1
DF1
## # A tibble: 9 × 3
## ID event counter
## <chr> <chr> <dbl>
## 1 7T A 1
## 2 7T B 1
## 3 7T A 1
## 4 7T A 1
## 5 7A A 1
## 6 7B B 1
## 7 7B B 1
## 8 7B C 1
## 9 7C A 1
library(dplyr)
DF2 <- DF1 %>%
group_by(ID) %>%
mutate(cA=cumsum(ifelse(!is.na(event) & event=="A",counter,0))) %>%
mutate(cB=cumsum(ifelse(!is.na(event) & event=="B",counter,0)))
DF2
## # A tibble: 9 × 5
## # Groups: ID [4]
## ID event counter cA cB
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 7T A 1 1 0
## 2 7T B 1 1 1
## 3 7T A 1 2 1
## 4 7T A 1 3 1
## 5 7A A 1 1 0
## 6 7B B 1 0 1
## 7 7B B 1 0 2
## 8 7B C 1 0 2
## 9 7C A 1 1 0
Turn continuous variables into categorical ones. Also known as “binning” #bin #binning #discretize
a <- table(cut(ChickWeight[["weight"]], seq(0, 450, 50)))
a <- as.data.frame(a)
a
## Var1 Freq
## 1 (0,50] 85
## 2 (50,100] 193
## 3 (100,150] 122
## 4 (150,200] 94
## 5 (200,250] 47
## 6 (250,300] 23
## 7 (300,350] 12
## 8 (350,400] 2
## 9 (400,450] 0
table(cut(ChickWeight$weight, seq(0, 450, 50), right = FALSE, labels = c(1:9)))
##
## 1 2 3 4 5 6 7 8 9
## 79 195 124 95 46 25 12 2 0
Binning into named groups.
# binning example
#
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:lubridate':
##
## hour, isoweek, mday, minute, month, quarter, second, wday, week,
## yday, year
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## The following object is masked from 'package:purrr':
##
## transpose
library(tidyverse)
#
daysV <- seq(1,100, by = 1) # vector of 1 to 100
daysDF <- as.data.frame(daysV) # convert to dataframe
colnames(daysDF) = c("days") # give the variable a name
head(daysDF) # have a look
## days
## 1 1
## 2 2
## 3 3
## 4 4
## 5 5
## 6 6
ceiling <- max(daysDF$days) + 1 ; ceiling # calculate max value plus 1
## [1] 101
daybreaks <- c(0, 31, 61, 91, ceiling) # N items
daylabels <- c("0-30", "31-60", "61-90", "91+") # N - 1 items
data.table::setDT(daysDF)[ , daygroups := base::cut(daysDF$days,
breaks = daybreaks,
right = FALSE,
labels = daylabels)]
daysDF %>% arrange(daysV) # have a look
## days daygroups
## 1: 1 0-30
## 2: 2 0-30
## 3: 3 0-30
## 4: 4 0-30
## 5: 5 0-30
## 6: 6 0-30
## 7: 7 0-30
## 8: 8 0-30
## 9: 9 0-30
## 10: 10 0-30
## 11: 11 0-30
## 12: 12 0-30
## 13: 13 0-30
## 14: 14 0-30
## 15: 15 0-30
## 16: 16 0-30
## 17: 17 0-30
## 18: 18 0-30
## 19: 19 0-30
## 20: 20 0-30
## 21: 21 0-30
## 22: 22 0-30
## 23: 23 0-30
## 24: 24 0-30
## 25: 25 0-30
## 26: 26 0-30
## 27: 27 0-30
## 28: 28 0-30
## 29: 29 0-30
## 30: 30 0-30
## 31: 31 31-60
## 32: 32 31-60
## 33: 33 31-60
## 34: 34 31-60
## 35: 35 31-60
## 36: 36 31-60
## 37: 37 31-60
## 38: 38 31-60
## 39: 39 31-60
## 40: 40 31-60
## 41: 41 31-60
## 42: 42 31-60
## 43: 43 31-60
## 44: 44 31-60
## 45: 45 31-60
## 46: 46 31-60
## 47: 47 31-60
## 48: 48 31-60
## 49: 49 31-60
## 50: 50 31-60
## 51: 51 31-60
## 52: 52 31-60
## 53: 53 31-60
## 54: 54 31-60
## 55: 55 31-60
## 56: 56 31-60
## 57: 57 31-60
## 58: 58 31-60
## 59: 59 31-60
## 60: 60 31-60
## 61: 61 61-90
## 62: 62 61-90
## 63: 63 61-90
## 64: 64 61-90
## 65: 65 61-90
## 66: 66 61-90
## 67: 67 61-90
## 68: 68 61-90
## 69: 69 61-90
## 70: 70 61-90
## 71: 71 61-90
## 72: 72 61-90
## 73: 73 61-90
## 74: 74 61-90
## 75: 75 61-90
## 76: 76 61-90
## 77: 77 61-90
## 78: 78 61-90
## 79: 79 61-90
## 80: 80 61-90
## 81: 81 61-90
## 82: 82 61-90
## 83: 83 61-90
## 84: 84 61-90
## 85: 85 61-90
## 86: 86 61-90
## 87: 87 61-90
## 88: 88 61-90
## 89: 89 61-90
## 90: 90 61-90
## 91: 91 91+
## 92: 92 91+
## 93: 93 91+
## 94: 94 91+
## 95: 95 91+
## 96: 96 91+
## 97: 97 91+
## 98: 98 91+
## 99: 99 91+
## 100: 100 91+
## days daygroups
# FIN
data.frame(v = 1:4, ch = c("a","B","c","d"), n = 10) #create a data frame
## v ch n
## 1 1 a 10
## 2 2 B 10
## 3 3 c 10
## 4 4 d 10
foo <- list()
foo[[1]] <- data.frame(a=1:5, b=11:15)
foo[[2]] <- data.frame(a=101:105, b=111:115)
foo[[3]] <- data.frame(a=200:210, b=300:310)
df1 <- do.call(rbind, foo)
df1
## a b
## 1 1 11
## 2 2 12
## 3 3 13
## 4 4 14
## 5 5 15
## 6 101 111
## 7 102 112
## 8 103 113
## 9 104 114
## 10 105 115
## 11 200 300
## 12 201 301
## 13 202 302
## 14 203 303
## 15 204 304
## 16 205 305
## 17 206 306
## 18 207 307
## 19 208 308
## 20 209 309
## 21 210 310
Find rows with particular column values duplicated. Keep the duplicates.
# create a dummy data frame
(df<-data.frame(ID=c("userA", "userB", "userA", "userC"),
OS=c("Win","OSX","Win", "Win64"),
time=c("12:22","23:22","04:44","12:28")))
## ID OS time
## 1 userA Win 12:22
## 2 userB OSX 23:22
## 3 userA Win 04:44
## 4 userC Win64 12:28
# Find all records with duplicate IDs
ALL_RECORDS <- df[df$ID == df$ID[duplicated(df$ID)],]
print(ALL_RECORDS) # print the duplicate records
## ID OS time
## 1 userA Win 12:22
## 3 userA Win 04:44
Or use a function.
FuncDuplicates <- function (value)
{
duplicated(value) | duplicated(value, fromLast = TRUE)
}
DF_1 <- data.frame(ID=c("userA", "userB", "userA", "userC", "userA"),
OS=c("Win","OSX","Win", "Win64", "Linux"),
time=c("12:22","23:22","04:44","12:28", "13:33"))
DF_1[FuncDuplicates(DF_1$ID),]
## ID OS time
## 1 userA Win 12:22
## 3 userA Win 04:44
## 5 userA Linux 13:33
Duplicates across two columns (method 1).
# Duplicates across two columns (method 1).
df = structure(list(a = c(1, 2, 3, 4, 5, 6, 7, 8), b = c(2, 4, 6,
8, 10, 12, 13, 14), c = structure(c(1L, 2L, 2L, 3L, 4L, 4L, 5L,
5L), .Label = c("A", "B", "C", "D", "E"), class = "factor"),
d = c(1001, 1002, 1002, 1003, 1004, 1004, 1005, 1006)), .Names = c("a",
"b", "c", "d"), row.names = c(NA, -8L), class = "data.frame")
df # show the data frame
## a b c d
## 1 1 2 A 1001
## 2 2 4 B 1002
## 3 3 6 B 1002
## 4 4 8 C 1003
## 5 5 10 D 1004
## 6 6 12 D 1004
## 7 7 13 E 1005
## 8 8 14 E 1006
# The formula is NOT(duplicated OR duplicated). The first duplicated does not
# identify the first occurrences of duplicated values and the second duplicated
# does not identify the last occurrences of duplicated values.
# Together, they identify all duplicated values.
# Not duplicated
df[!(duplicated(df[c("c","d")]) | duplicated(df[c("c","d")], fromLast = TRUE)), ]
## a b c d
## 1 1 2 A 1001
## 4 4 8 C 1003
## 7 7 13 E 1005
## 8 8 14 E 1006
# This next line does IS(duplicated OR duplicated) by removing the !
# Duplicated
df[(duplicated(df[c("c","d")]) | duplicated(df[c("c","d")], fromLast = TRUE)), ]
## a b c d
## 2 2 4 B 1002
## 3 3 6 B 1002
## 5 5 10 D 1004
## 6 6 12 D 1004
Duplicates across two columns (method 2).
FuncDuplicates <- function (value)
{
duplicated(value) | duplicated(value, fromLast = TRUE)
}
(DF_1 <- data.frame(ID=c("userA", "userB", "userA", "userC", "userA", "userA"),
OS=c("Win","OSX","Win", "Win64", "Linux", "OSX"),
time=c("12:22","23:22","04:44","12:28", "13:33", "12:22")))
## ID OS time
## 1 userA Win 12:22
## 2 userB OSX 23:22
## 3 userA Win 04:44
## 4 userC Win64 12:28
## 5 userA Linux 13:33
## 6 userA OSX 12:22
DF_1[FuncDuplicates(DF_1[1:2]),] # duplicate ID and OS
## ID OS time
## 1 userA Win 12:22
## 3 userA Win 04:44
DF_1[FuncDuplicates(DF_1[1,3]),] # doesn't work
## [1] ID OS time
## <0 rows> (or 0-length row.names)
DF_1$time2 = DF_1$time # create a new column
DF_1$ID2 = DF_1$ID # create another new column beside it
DF_1[FuncDuplicates(DF_1[4:5]),] # duplicate ID2 and time2, this works
## ID OS time time2 ID2
## 1 userA Win 12:22 12:22 userA
## 6 userA OSX 12:22 12:22 userA
expand.grid(c("H","T"), 1:6) # coin and dice combination outcomes
## Var1 Var2
## 1 H 1
## 2 T 1
## 3 H 2
## 4 T 2
## 5 H 3
## 6 T 3
## 7 H 4
## 8 T 4
## 9 H 5
## 10 T 5
## 11 H 6
## 12 T 6
colors <- c("red", "blue", "green", "white")
expand.grid(colors, colors, colors) # combinations with reuse
## Var1 Var2 Var3
## 1 red red red
## 2 blue red red
## 3 green red red
## 4 white red red
## 5 red blue red
## 6 blue blue red
## 7 green blue red
## 8 white blue red
## 9 red green red
## 10 blue green red
## 11 green green red
## 12 white green red
## 13 red white red
## 14 blue white red
## 15 green white red
## 16 white white red
## 17 red red blue
## 18 blue red blue
## 19 green red blue
## 20 white red blue
## 21 red blue blue
## 22 blue blue blue
## 23 green blue blue
## 24 white blue blue
## 25 red green blue
## 26 blue green blue
## 27 green green blue
## 28 white green blue
## 29 red white blue
## 30 blue white blue
## 31 green white blue
## 32 white white blue
## 33 red red green
## 34 blue red green
## 35 green red green
## 36 white red green
## 37 red blue green
## 38 blue blue green
## 39 green blue green
## 40 white blue green
## 41 red green green
## 42 blue green green
## 43 green green green
## 44 white green green
## 45 red white green
## 46 blue white green
## 47 green white green
## 48 white white green
## 49 red red white
## 50 blue red white
## 51 green red white
## 52 white red white
## 53 red blue white
## 54 blue blue white
## 55 green blue white
## 56 white blue white
## 57 red green white
## 58 blue green white
## 59 green green white
## 60 white green white
## 61 red white white
## 62 blue white white
## 63 green white white
## 64 white white white
Fill a row with values from above or below.
ID <- 1:3
Name <- c("Tom", NA, "Harry")
(DF0 <- tibble::tibble(ID, Name)) # create and show data frame
## # A tibble: 3 × 2
## ID Name
## <int> <chr>
## 1 1 Tom
## 2 2 <NA>
## 3 3 Harry
library(tidyr)
(DF1 <- DF0 %>%
tidyr::fill(Name, .direction = "down"))
## # A tibble: 3 × 2
## ID Name
## <int> <chr>
## 1 1 Tom
## 2 2 Tom
## 3 3 Harry
(DF2 <- DF0 %>%
tidyr::fill(Name, .direction = "up"))
## # A tibble: 3 × 2
## ID Name
## <int> <chr>
## 1 1 Tom
## 2 2 Harry
## 3 3 Harry
Bit of a contrived example but … filter for where any of three variables has a value of 1.
mtcars %>%
tibble::rownames_to_column(var="car_name") %>%
select(car_name, vs, am, carb) %>%
filter_at(vars(vs, am, carb), all_vars(.== 1))
## car_name vs am carb
## 1 Datsun 710 1 1 1
## 2 Fiat 128 1 1 1
## 3 Toyota Corolla 1 1 1
## 4 Fiat X1-9 1 1 1
floor takes a single numeric argument x and returns a numeric vector containing the largest integers not greater than the corresponding elements of x.
x <- c(-1.2, 0, 4.1, 4.9, 5.0)
floor(x)
## [1] -2 0 4 4 5
a <- data.table::data.table(c(1,2,3))
b <- data.table::data.table(c(2,3,4))
data.table::fsetdiff(a,b)
## V1
## 1: 1
data.table::fsetdiff(b,a)
## V1
## 1: 4
# simple gather example
suppressPackageStartupMessages(library(tidyr))
# create simple data frame
DF0 <- read.table(header = TRUE, text = "
name Age Salary Sex
Amy 21 21000 F
Bill 32 32000 M
Cathy 41 41000 F
")
DF0 # look at the data frame
## name Age Salary Sex
## 1 Amy 21 21000 F
## 2 Bill 32 32000 M
## 3 Cathy 41 41000 F
# gather it
(DF1 <- tidyr::gather(DF0, variable, value, -name))
## name variable value
## 1 Amy Age 21
## 2 Bill Age 32
## 3 Cathy Age 41
## 4 Amy Salary 21000
## 5 Bill Salary 32000
## 6 Cathy Salary 41000
## 7 Amy Sex F
## 8 Bill Sex M
## 9 Cathy Sex F
# another example
DF2 <- read.table(header = TRUE, text = "
name team YR2014 YR2015 YR2016
Amy team_Y 1 2 4
Arnold team_Y 4 4 9
Annmarie team_X 6 9 2
Benny team_X 3 3 4
Belinda team_Y 4 5 3
Benjy team_Y 6 2 3
Beth team_X 1 2 4")
DF2 # look at the data
## name team YR2014 YR2015 YR2016
## 1 Amy team_Y 1 2 4
## 2 Arnold team_Y 4 4 9
## 3 Annmarie team_X 6 9 2
## 4 Benny team_X 3 3 4
## 5 Belinda team_Y 4 5 3
## 6 Benjy team_Y 6 2 3
## 7 Beth team_X 1 2 4
DF3 <- tidyr::gather(DF2, year, number_of_medals, -name, -team)
DF3 # look at the data
## name team year number_of_medals
## 1 Amy team_Y YR2014 1
## 2 Arnold team_Y YR2014 4
## 3 Annmarie team_X YR2014 6
## 4 Benny team_X YR2014 3
## 5 Belinda team_Y YR2014 4
## 6 Benjy team_Y YR2014 6
## 7 Beth team_X YR2014 1
## 8 Amy team_Y YR2015 2
## 9 Arnold team_Y YR2015 4
## 10 Annmarie team_X YR2015 9
## 11 Benny team_X YR2015 3
## 12 Belinda team_Y YR2015 5
## 13 Benjy team_Y YR2015 2
## 14 Beth team_X YR2015 2
## 15 Amy team_Y YR2016 4
## 16 Arnold team_Y YR2016 9
## 17 Annmarie team_X YR2016 2
## 18 Benny team_X YR2016 4
## 19 Belinda team_Y YR2016 3
## 20 Benjy team_Y YR2016 3
## 21 Beth team_X YR2016 4
Write and read SAS format files.
file1 <- "mtcars.sas7bdat"
haven::write_sas(mtcars, file1)
## Warning: `write_sas()` was deprecated in haven 2.5.2.
## ℹ Please use `write_xpt()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
DF_1 <- haven::read_sas(file1)
head(DF_1,3)
## # A tibble: 3 × 11
## mpg cyl disp hp drat wt qsec vs am gear carb
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4
## 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4
## 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1
a = 1
a %in% c(1,2,3)
## [1] TRUE
b = 4
b %in% c(1,2,3)
## [1] FALSE
a <- 1 ; b <- 2
identical(a, b)
## [1] FALSE
c <- 3 ; d <- 3
identical(c, d)
## [1] TRUE
x <- c(6:-4); sqrt(x) #- gives warning
## Warning in sqrt(x): NaNs produced
## [1] 2.449490 2.236068 2.000000 1.732051 1.414214 1.000000 0.000000 NaN
## [9] NaN NaN NaN
sqrt(ifelse(x >= 0, x, NA)) # no warning
## [1] 2.449490 2.236068 2.000000 1.732051 1.414214 1.000000 0.000000 NA
## [9] NA NA NA
foo <- function(x, y) {
ifelse(x < y, x*x, -(y*y))
}
foo(1:5, 5:1)
## [1] 1 4 -9 -4 -1
(a <- matrix(1:9, 3, 3))
## [,1] [,2] [,3]
## [1,] 1 4 7
## [2,] 2 5 8
## [3,] 3 6 9
ifelse(a %% 2 == 0, a, 0)
## [,1] [,2] [,3]
## [1,] 0 4 0
## [2,] 2 0 8
## [3,] 0 6 0
# conditional assignment using if else
a <- 5
x <- if(a == 5) 1 else 2
x
## [1] 1
x <- c(1, 2, 3) ; y <- c(2, 3, 4) ; intersect(x, y)
## [1] 2 3
x <- c(1, 2, 3) ; y <- c(2, 3, 4) ; intersect(y, x)
## [1] 2 3
The code below checks if package is installed before installing each of the packages in the vector
is_installed <- function(mypkg) is.element(mypkg, installed.packages()[,1])
load_or_install <- function(package_names) {
for(package_name in package_names) {
if(!is_installed(package_name)) {
install.packages(package_name,repos="http://lib.stat.cmu.edu/R/CRAN")
}
library(package_name,character.only=TRUE,quietly=TRUE,verbose=FALSE)
}
}
load_or_install(c("haven","ggplot2")) # vector of packages to be installed
a <- c(1,2,3)
b <- 2
is.element(b, a) # same as %in%
## [1] TRUE
J1 <- '[
{
"year": "2016",
"student": "student_one",
"mathematics": 7,
"english": 8,
"biology": 5
},
{
"year": "2016",
"student": "student_two",
"mathematics": 3,
"english": 9,
"biology": 7
}
]'
(students <- jsonlite::fromJSON(txt=J1))
## year student mathematics english biology
## 1 2016 student_one 7 8 5
## 2 2016 student_two 3 9 7
nycflights13::flights %>%
keep(is.character) %>%
head()
## # A tibble: 6 × 4
## carrier tailnum origin dest
## <chr> <chr> <chr> <chr>
## 1 UA N14228 EWR IAH
## 2 UA N24211 LGA IAH
## 3 AA N619AA JFK MIA
## 4 B6 N804JB JFK BQN
## 5 DL N668DN LGA ATL
## 6 UA N39463 EWR ORD
nycflights13::flights %>%
keep(is.integer) %>%
head()
## # A tibble: 6 × 8
## year month day dep_time sched_dep_time arr_time sched_arr_time flight
## <int> <int> <int> <int> <int> <int> <int> <int>
## 1 2013 1 1 517 515 830 819 1545
## 2 2013 1 1 533 529 850 830 1714
## 3 2013 1 1 542 540 923 850 1141
## 4 2013 1 1 544 545 1004 1022 725
## 5 2013 1 1 554 600 812 837 461
## 6 2013 1 1 554 558 740 728 1696
How long is something?
x <- 1:6
length(x)
## [1] 6
length(1:5)
## [1] 5
length(2315:4567)
## [1] 2253
length(letters)
## [1] 26
letters # lower case letters
## [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r" "s"
## [20] "t" "u" "v" "w" "x" "y" "z"
LETTERS # upper case letters
## [1] "A" "B" "C" "D" "E" "F" "G" "H" "I" "J" "K" "L" "M" "N" "O" "P" "Q" "R" "S"
## [20] "T" "U" "V" "W" "X" "Y" "Z"
(x <- gl(3,3,9)) # generate factor levels
## [1] 1 1 1 2 2 2 3 3 3
## Levels: 1 2 3
levels(x)
## [1] "1" "2" "3"
Change a column’s name.
(DF_1 <- data.frame(Name=c("Pat", "Jim", "Jane", "Mary"),
Age=c(12,23,34,45),
time=c("12:22","23:22","04:44","12:28")))
## Name Age time
## 1 Pat 12 12:22
## 2 Jim 23 23:22
## 3 Jane 34 04:44
## 4 Mary 45 12:28
names(DF_1)[names(DF_1) == 'Name'] <- 'First_Name' # From / To
DF_1
## First_Name Age time
## 1 Pat 12 12:22
## 2 Jim 23 23:22
## 3 Jane 34 04:44
## 4 Mary 45 12:28
ncol(mtcars) # the number of columns.
## [1] 11
nrow(mtcars) # the number of rows.
## [1] 32
Count distinct / unique values in a vector
vec <- c(1,2,3,4,1,2,3,4,1,2,3,4,5)
dplyr::n_distinct(vec)
## [1] 5
# cars with mpg within one standard deviation of 20
mtcars %>%
tibble::rownames_to_column(var="car_name") %>%
select(car_name, mpg) %>%
filter(near(mpg, 20, tol = sd(mpg)))
## car_name mpg
## 1 Mazda RX4 21.0
## 2 Mazda RX4 Wag 21.0
## 3 Datsun 710 22.8
## 4 Hornet 4 Drive 21.4
## 5 Hornet Sportabout 18.7
## 6 Valiant 18.1
## 7 Duster 360 14.3
## 8 Merc 240D 24.4
## 9 Merc 230 22.8
## 10 Merc 280 19.2
## 11 Merc 280C 17.8
## 12 Merc 450SE 16.4
## 13 Merc 450SL 17.3
## 14 Merc 450SLC 15.2
## 15 Chrysler Imperial 14.7
## 16 Toyota Corona 21.5
## 17 Dodge Challenger 15.5
## 18 AMC Javelin 15.2
## 19 Pontiac Firebird 19.2
## 20 Porsche 914-2 26.0
## 21 Ford Pantera L 15.8
## 22 Ferrari Dino 19.7
## 23 Maserati Bora 15.0
## 24 Volvo 142E 21.4
Categorize a vector or data frame into n-tiles where you choose n.
library(tidyverse)
(df0 <- data.frame(V1 = 13:24)) # create data frame of 12 numbers
## V1
## 1 13
## 2 14
## 3 15
## 4 16
## 5 17
## 6 18
## 7 19
## 8 20
## 9 21
## 10 22
## 11 23
## 12 24
df0 %>% mutate(ntile = ntile(V1, 4)) # categorize into 4
## V1 ntile
## 1 13 1
## 2 14 1
## 3 15 1
## 4 16 2
## 5 17 2
## 6 18 2
## 7 19 3
## 8 20 3
## 9 21 3
## 10 22 4
## 11 23 4
## 12 24 4
df0 %>% mutate(ntile = ntile(V1, 3)) # categorize into 3
## V1 ntile
## 1 13 1
## 2 14 1
## 3 15 1
## 4 16 1
## 5 17 2
## 6 18 2
## 7 19 2
## 8 20 2
## 9 21 3
## 10 22 3
## 11 23 3
## 12 24 3
outer(1:5, 1:5) # 5 x 5 multiplication table
## [,1] [,2] [,3] [,4] [,5]
## [1,] 1 2 3 4 5
## [2,] 2 4 6 8 10
## [3,] 3 6 9 12 15
## [4,] 4 8 12 16 20
## [5,] 5 10 15 20 25
values <- c("A", 2, 3, 4, 5, 6, 7, 8, 9, 10, "J", "Q", "K")
suits <- c("D", "C", "S", "H")
outer(values, suits, FUN = "paste", sep = "") # Generate Deck of Cards
## [,1] [,2] [,3] [,4]
## [1,] "AD" "AC" "AS" "AH"
## [2,] "2D" "2C" "2S" "2H"
## [3,] "3D" "3C" "3S" "3H"
## [4,] "4D" "4C" "4S" "4H"
## [5,] "5D" "5C" "5S" "5H"
## [6,] "6D" "6C" "6S" "6H"
## [7,] "7D" "7C" "7S" "7H"
## [8,] "8D" "8C" "8S" "8H"
## [9,] "9D" "9C" "9S" "9H"
## [10,] "10D" "10C" "10S" "10H"
## [11,] "JD" "JC" "JS" "JH"
## [12,] "QD" "QC" "QS" "QH"
## [13,] "KD" "KC" "KS" "KH"
Create a scatterplot matrix.
pairs(mtcars[c("mpg","wt","cyl","disp")])
pairs(iris[1:4], main = "Anderson's Iris Data -- 3 species",
pch = 21, bg = c("red", "green3", "blue")[unclass(iris$Species)])
psych::pairs.panels(iris[1:4], main = "Anderson's Iris Data -- 3 species",
pch = 21, bg = c("red", "green3", "blue")[unclass(iris$Species)])
(a <- c("xxx234,334", "22aa22", "a11","11a"))
## [1] "xxx234,334" "22aa22" "a11" "11a"
(b <- readr::parse_number(a))
## [1] 234334 22 11 11
(x <- c(2, 4, 6, 8, 10)); prod(x)
## [1] 2 4 6 8 10
## [1] 3840
Pull out a single variable.
mtcars %>% dplyr::pull(mpg)
## [1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 10.4
## [16] 10.4 14.7 32.4 30.4 33.9 21.5 15.5 15.2 13.3 19.2 27.3 26.0 30.4 15.8 19.7
## [31] 15.0 21.4
mtcars %>% dplyr::pull(2)
## [1] 6 6 4 6 8 6 8 4 4 6 6 8 8 8 8 8 8 4 4 4 4 8 8 8 8 4 4 4 8 6 8 4
x <- rnorm(100) ; plot(x) ; range(x)
## [1] -2.313863 2.875106
rep("a", 10)
## [1] "a" "a" "a" "a" "a" "a" "a" "a" "a" "a"
rep(c(1, 2, 3), 2)
## [1] 1 2 3 1 2 3
rep(c(1, 2, 3), 3)
## [1] 1 2 3 1 2 3 1 2 3
rep(c(1, 2, 3), each = 2)
## [1] 1 1 2 2 3 3
rep(6:10)
## [1] 6 7 8 9 10
rep(6:10, 1:5) # repeat 6 to 10, 1 to 5 times respectively
## [1] 6 7 7 8 8 8 9 9 9 9 10 10 10 10 10
Reverse elements.
V1 = c(1,2,3)
rev(V1)
## [1] 3 2 1
rev(rev(V1))
## [1] 1 2 3
(x <- rev(rep(6:10, 1:5)))
## [1] 10 10 10 10 10 9 9 9 9 8 8 8 7 7 6
rle(x)
## Run Length Encoding
## lengths: int [1:5] 5 4 3 2 1
## values : int [1:5] 10 9 8 7 6
z <- c(TRUE, TRUE, FALSE, FALSE, TRUE, FALSE, TRUE, TRUE, TRUE)
rle(z)
## Run Length Encoding
## lengths: int [1:5] 2 2 1 1 3
## values : logi [1:5] TRUE FALSE TRUE FALSE TRUE
rle(as.character(z))
## Run Length Encoding
## lengths: int [1:5] 2 2 1 1 3
## values : chr [1:5] "TRUE" "FALSE" "TRUE" "FALSE" "TRUE"
print(rle(z), prefix = "..| ")
## ..| Run Length Encoding
## ..| lengths: int [1:5] 2 2 1 1 3
## ..| values : logi [1:5] TRUE FALSE TRUE FALSE TRUE
round rounds the values in its first argument to the specified number of decimal places (default 0).
x = c( -1.1234, 1.1234 )
round(x)
## [1] -1 1
round(x, 1)
## [1] -1.1 1.1
round(x, 2)
## [1] -1.12 1.12
# find the length of mtcars observations:
sapply(mtcars, length)
## mpg cyl disp hp drat wt qsec vs am gear carb
## 32 32 32 32 32 32 32 32 32 32 32
# find the sums of mtcars observations:
sapply(mtcars, sum)
## mpg cyl disp hp drat wt qsec vs
## 642.900 198.000 7383.100 4694.000 115.090 102.952 571.160 14.000
## am gear carb
## 13.000 118.000 90.000
# find the quantiles
sapply(mtcars, quantile)
## mpg cyl disp hp drat wt qsec vs am gear carb
## 0% 10.400 4 71.100 52.0 2.760 1.51300 14.5000 0 0 3 1
## 25% 15.425 4 120.825 96.5 3.080 2.58125 16.8925 0 0 3 2
## 50% 19.200 6 196.300 123.0 3.695 3.32500 17.7100 0 0 4 2
## 75% 22.800 8 326.000 180.0 3.920 3.61000 18.9000 1 1 4 4
## 100% 33.900 8 472.000 335.0 4.930 5.42400 22.9000 1 1 5 8
# Find the classes
sapply(mtcars, class)
## mpg cyl disp hp drat wt qsec vs
## "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
## am gear carb
## "numeric" "numeric" "numeric"
# Apply the “DerivativeFunction” to mtcars
DerivativeFunction <- function(x) { log10(x) + 1 }
sapply(mtcars, DerivativeFunction)
## mpg cyl disp hp drat wt qsec vs am
## [1,] 2.322219 1.778151 3.204120 3.041393 1.591065 1.418301 2.216430 -Inf 1
## [2,] 2.322219 1.778151 3.204120 3.041393 1.591065 1.458638 2.230960 -Inf 1
## [3,] 2.357935 1.602060 3.033424 2.968483 1.585461 1.365488 2.269746 1 1
## [4,] 2.330414 1.778151 3.411620 3.041393 1.488551 1.507181 2.288696 1 -Inf
## [5,] 2.271842 1.903090 3.556303 3.243038 1.498311 1.536558 2.230960 -Inf -Inf
## [6,] 2.257679 1.778151 3.352183 3.021189 1.440909 1.539076 2.305781 1 -Inf
## [7,] 2.155336 1.903090 3.556303 3.389166 1.506505 1.552668 2.199755 -Inf -Inf
## [8,] 2.387390 1.602060 3.166430 2.792392 1.567026 1.503791 2.301030 1 -Inf
## [9,] 2.357935 1.602060 3.148603 2.977724 1.593286 1.498311 2.359835 1 -Inf
## [10,] 2.283301 1.778151 3.224274 3.089905 1.593286 1.536558 2.262451 1 -Inf
## [11,] 2.250420 1.778151 3.224274 3.089905 1.593286 1.536558 2.276462 1 -Inf
## [12,] 2.214844 1.903090 3.440594 3.255273 1.487138 1.609594 2.240549 -Inf -Inf
## [13,] 2.238046 1.903090 3.440594 3.255273 1.487138 1.571709 2.245513 -Inf -Inf
## [14,] 2.181844 1.903090 3.440594 3.255273 1.487138 1.577492 2.255273 -Inf -Inf
## [15,] 2.017033 1.903090 3.673942 3.311754 1.466868 1.720159 2.254790 -Inf -Inf
## [16,] 2.017033 1.903090 3.662758 3.332438 1.477121 1.734320 2.250908 -Inf -Inf
## [17,] 2.167317 1.903090 3.643453 3.361728 1.509203 1.727948 2.241048 -Inf -Inf
## [18,] 2.510545 1.602060 2.895975 2.819544 1.610660 1.342423 2.289366 1 1
## [19,] 2.482874 1.602060 2.879096 2.716003 1.692847 1.208173 2.267641 1 1
## [20,] 2.530200 1.602060 2.851870 2.812913 1.625312 1.263636 2.298853 1 1
## [21,] 2.332438 1.602060 3.079543 2.986772 1.568202 1.391817 2.301247 1 -Inf
## [22,] 2.190332 1.903090 3.502427 3.176091 1.440909 1.546543 2.227115 -Inf -Inf
## [23,] 2.181844 1.903090 3.482874 3.176091 1.498311 1.535927 2.238046 -Inf -Inf
## [24,] 2.123852 1.903090 3.544068 3.389166 1.571709 1.584331 2.187803 -Inf -Inf
## [25,] 2.283301 1.903090 3.602060 3.243038 1.488551 1.584896 2.231724 -Inf -Inf
## [26,] 2.436163 1.602060 2.897627 2.819544 1.610660 1.286681 2.276462 1 1
## [27,] 2.414973 1.602060 3.080266 2.959041 1.646404 1.330414 2.222716 -Inf 1
## [28,] 2.482874 1.602060 2.978181 3.053078 1.576341 1.179839 2.227887 1 1
## [29,] 2.198657 1.903090 3.545307 3.421604 1.625312 1.501059 2.161368 -Inf 1
## [30,] 2.294466 1.778151 3.161368 3.243038 1.558709 1.442480 2.190332 -Inf 1
## [31,] 2.176091 1.903090 3.478566 3.525045 1.549003 1.552668 2.164353 -Inf 1
## [32,] 2.330414 1.602060 3.082785 3.037426 1.613842 1.444045 2.269513 1 1
## gear carb
## [1,] 1.602060 1.602060
## [2,] 1.602060 1.602060
## [3,] 1.602060 1.000000
## [4,] 1.477121 1.000000
## [5,] 1.477121 1.301030
## [6,] 1.477121 1.000000
## [7,] 1.477121 1.602060
## [8,] 1.602060 1.301030
## [9,] 1.602060 1.301030
## [10,] 1.602060 1.602060
## [11,] 1.602060 1.602060
## [12,] 1.477121 1.477121
## [13,] 1.477121 1.477121
## [14,] 1.477121 1.477121
## [15,] 1.477121 1.602060
## [16,] 1.477121 1.602060
## [17,] 1.477121 1.602060
## [18,] 1.602060 1.000000
## [19,] 1.602060 1.301030
## [20,] 1.602060 1.000000
## [21,] 1.477121 1.000000
## [22,] 1.477121 1.301030
## [23,] 1.477121 1.301030
## [24,] 1.477121 1.602060
## [25,] 1.477121 1.301030
## [26,] 1.602060 1.000000
## [27,] 1.698970 1.301030
## [28,] 1.698970 1.301030
## [29,] 1.698970 1.602060
## [30,] 1.698970 1.778151
## [31,] 1.698970 1.903090
## [32,] 1.602060 1.301030
# Script the “DerivativeFunction” within sapply().
sapply(mtcars, function(x) log10(x) + 1)
## mpg cyl disp hp drat wt qsec vs am
## [1,] 2.322219 1.778151 3.204120 3.041393 1.591065 1.418301 2.216430 -Inf 1
## [2,] 2.322219 1.778151 3.204120 3.041393 1.591065 1.458638 2.230960 -Inf 1
## [3,] 2.357935 1.602060 3.033424 2.968483 1.585461 1.365488 2.269746 1 1
## [4,] 2.330414 1.778151 3.411620 3.041393 1.488551 1.507181 2.288696 1 -Inf
## [5,] 2.271842 1.903090 3.556303 3.243038 1.498311 1.536558 2.230960 -Inf -Inf
## [6,] 2.257679 1.778151 3.352183 3.021189 1.440909 1.539076 2.305781 1 -Inf
## [7,] 2.155336 1.903090 3.556303 3.389166 1.506505 1.552668 2.199755 -Inf -Inf
## [8,] 2.387390 1.602060 3.166430 2.792392 1.567026 1.503791 2.301030 1 -Inf
## [9,] 2.357935 1.602060 3.148603 2.977724 1.593286 1.498311 2.359835 1 -Inf
## [10,] 2.283301 1.778151 3.224274 3.089905 1.593286 1.536558 2.262451 1 -Inf
## [11,] 2.250420 1.778151 3.224274 3.089905 1.593286 1.536558 2.276462 1 -Inf
## [12,] 2.214844 1.903090 3.440594 3.255273 1.487138 1.609594 2.240549 -Inf -Inf
## [13,] 2.238046 1.903090 3.440594 3.255273 1.487138 1.571709 2.245513 -Inf -Inf
## [14,] 2.181844 1.903090 3.440594 3.255273 1.487138 1.577492 2.255273 -Inf -Inf
## [15,] 2.017033 1.903090 3.673942 3.311754 1.466868 1.720159 2.254790 -Inf -Inf
## [16,] 2.017033 1.903090 3.662758 3.332438 1.477121 1.734320 2.250908 -Inf -Inf
## [17,] 2.167317 1.903090 3.643453 3.361728 1.509203 1.727948 2.241048 -Inf -Inf
## [18,] 2.510545 1.602060 2.895975 2.819544 1.610660 1.342423 2.289366 1 1
## [19,] 2.482874 1.602060 2.879096 2.716003 1.692847 1.208173 2.267641 1 1
## [20,] 2.530200 1.602060 2.851870 2.812913 1.625312 1.263636 2.298853 1 1
## [21,] 2.332438 1.602060 3.079543 2.986772 1.568202 1.391817 2.301247 1 -Inf
## [22,] 2.190332 1.903090 3.502427 3.176091 1.440909 1.546543 2.227115 -Inf -Inf
## [23,] 2.181844 1.903090 3.482874 3.176091 1.498311 1.535927 2.238046 -Inf -Inf
## [24,] 2.123852 1.903090 3.544068 3.389166 1.571709 1.584331 2.187803 -Inf -Inf
## [25,] 2.283301 1.903090 3.602060 3.243038 1.488551 1.584896 2.231724 -Inf -Inf
## [26,] 2.436163 1.602060 2.897627 2.819544 1.610660 1.286681 2.276462 1 1
## [27,] 2.414973 1.602060 3.080266 2.959041 1.646404 1.330414 2.222716 -Inf 1
## [28,] 2.482874 1.602060 2.978181 3.053078 1.576341 1.179839 2.227887 1 1
## [29,] 2.198657 1.903090 3.545307 3.421604 1.625312 1.501059 2.161368 -Inf 1
## [30,] 2.294466 1.778151 3.161368 3.243038 1.558709 1.442480 2.190332 -Inf 1
## [31,] 2.176091 1.903090 3.478566 3.525045 1.549003 1.552668 2.164353 -Inf 1
## [32,] 2.330414 1.602060 3.082785 3.037426 1.613842 1.444045 2.269513 1 1
## gear carb
## [1,] 1.602060 1.602060
## [2,] 1.602060 1.602060
## [3,] 1.602060 1.000000
## [4,] 1.477121 1.000000
## [5,] 1.477121 1.301030
## [6,] 1.477121 1.000000
## [7,] 1.477121 1.602060
## [8,] 1.602060 1.301030
## [9,] 1.602060 1.301030
## [10,] 1.602060 1.602060
## [11,] 1.602060 1.602060
## [12,] 1.477121 1.477121
## [13,] 1.477121 1.477121
## [14,] 1.477121 1.477121
## [15,] 1.477121 1.602060
## [16,] 1.477121 1.602060
## [17,] 1.477121 1.602060
## [18,] 1.602060 1.000000
## [19,] 1.602060 1.301030
## [20,] 1.602060 1.000000
## [21,] 1.477121 1.000000
## [22,] 1.477121 1.301030
## [23,] 1.477121 1.301030
## [24,] 1.477121 1.602060
## [25,] 1.477121 1.301030
## [26,] 1.602060 1.000000
## [27,] 1.698970 1.301030
## [28,] 1.698970 1.301030
## [29,] 1.698970 1.602060
## [30,] 1.698970 1.778151
## [31,] 1.698970 1.903090
## [32,] 1.602060 1.301030
# Find the range
sapply(mtcars, range)
## mpg cyl disp hp drat wt qsec vs am gear carb
## [1,] 10.4 4 71.1 52 2.76 1.513 14.5 0 0 3 1
## [2,] 33.9 8 472.0 335 4.93 5.424 22.9 1 1 5 8
# Print mtcars
sapply(mtcars, print)
## [1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 10.4
## [16] 10.4 14.7 32.4 30.4 33.9 21.5 15.5 15.2 13.3 19.2 27.3 26.0 30.4 15.8 19.7
## [31] 15.0 21.4
## [1] 6 6 4 6 8 6 8 4 4 6 6 8 8 8 8 8 8 4 4 4 4 8 8 8 8 4 4 4 8 6 8 4
## [1] 160.0 160.0 108.0 258.0 360.0 225.0 360.0 146.7 140.8 167.6 167.6 275.8
## [13] 275.8 275.8 472.0 460.0 440.0 78.7 75.7 71.1 120.1 318.0 304.0 350.0
## [25] 400.0 79.0 120.3 95.1 351.0 145.0 301.0 121.0
## [1] 110 110 93 110 175 105 245 62 95 123 123 180 180 180 205 215 230 66 52
## [20] 65 97 150 150 245 175 66 91 113 264 175 335 109
## [1] 3.90 3.90 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 3.92 3.07 3.07 3.07 2.93
## [16] 3.00 3.23 4.08 4.93 4.22 3.70 2.76 3.15 3.73 3.08 4.08 4.43 3.77 4.22 3.62
## [31] 3.54 4.11
## [1] 2.620 2.875 2.320 3.215 3.440 3.460 3.570 3.190 3.150 3.440 3.440 4.070
## [13] 3.730 3.780 5.250 5.424 5.345 2.200 1.615 1.835 2.465 3.520 3.435 3.840
## [25] 3.845 1.935 2.140 1.513 3.170 2.770 3.570 2.780
## [1] 16.46 17.02 18.61 19.44 17.02 20.22 15.84 20.00 22.90 18.30 18.90 17.40
## [13] 17.60 18.00 17.98 17.82 17.42 19.47 18.52 19.90 20.01 16.87 17.30 15.41
## [25] 17.05 18.90 16.70 16.90 14.50 15.50 14.60 18.60
## [1] 0 0 1 1 0 1 0 1 1 1 1 0 0 0 0 0 0 1 1 1 1 0 0 0 0 1 0 1 0 0 0 1
## [1] 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 1 1 1 1 1 1 1
## [1] 4 4 4 3 3 3 3 4 4 4 4 3 3 3 3 3 3 4 4 4 3 3 3 3 3 4 5 5 5 5 5 4
## [1] 4 4 1 1 2 1 4 2 2 4 4 3 3 3 4 4 4 1 2 1 1 2 2 4 2 1 2 2 4 6 8 2
## mpg cyl disp hp drat wt qsec vs am gear carb
## [1,] 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## [2,] 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## [3,] 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## [4,] 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## [5,] 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
## [6,] 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## [7,] 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
## [8,] 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## [9,] 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## [10,] 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
## [11,] 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
## [12,] 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
## [13,] 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
## [14,] 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
## [15,] 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
## [16,] 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
## [17,] 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
## [18,] 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
## [19,] 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
## [20,] 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
## [21,] 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
## [22,] 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
## [23,] 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
## [24,] 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
## [25,] 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
## [26,] 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
## [27,] 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
## [28,] 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
## [29,] 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
## [30,] 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
## [31,] 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
## [32,] 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
# Find the means
sapply(mtcars, mean)
## mpg cyl disp hp drat wt qsec
## 20.090625 6.187500 230.721875 146.687500 3.596563 3.217250 17.848750
## vs am gear carb
## 0.437500 0.406250 3.687500 2.812500
# Use sapply() to inspect for numeric values:
sapply(mtcars, is.numeric)
## mpg cyl disp hp drat wt qsec vs am gear carb
## TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
Various examples below.
cols <- c("mpg","cyl","gear") # create vector of columns you want
mtcars %>%
select(!!cols)
## mpg cyl gear
## Mazda RX4 21.0 6 4
## Mazda RX4 Wag 21.0 6 4
## Datsun 710 22.8 4 4
## Hornet 4 Drive 21.4 6 3
## Hornet Sportabout 18.7 8 3
## Valiant 18.1 6 3
## Duster 360 14.3 8 3
## Merc 240D 24.4 4 4
## Merc 230 22.8 4 4
## Merc 280 19.2 6 4
## Merc 280C 17.8 6 4
## Merc 450SE 16.4 8 3
## Merc 450SL 17.3 8 3
## Merc 450SLC 15.2 8 3
## Cadillac Fleetwood 10.4 8 3
## Lincoln Continental 10.4 8 3
## Chrysler Imperial 14.7 8 3
## Fiat 128 32.4 4 4
## Honda Civic 30.4 4 4
## Toyota Corolla 33.9 4 4
## Toyota Corona 21.5 4 3
## Dodge Challenger 15.5 8 3
## AMC Javelin 15.2 8 3
## Camaro Z28 13.3 8 3
## Pontiac Firebird 19.2 8 3
## Fiat X1-9 27.3 4 4
## Porsche 914-2 26.0 4 5
## Lotus Europa 30.4 4 5
## Ford Pantera L 15.8 8 5
## Ferrari Dino 19.7 6 5
## Maserati Bora 15.0 8 5
## Volvo 142E 21.4 4 4
# or you can use "one_of"
mtcars %>%
select(one_of(cols))
## mpg cyl gear
## Mazda RX4 21.0 6 4
## Mazda RX4 Wag 21.0 6 4
## Datsun 710 22.8 4 4
## Hornet 4 Drive 21.4 6 3
## Hornet Sportabout 18.7 8 3
## Valiant 18.1 6 3
## Duster 360 14.3 8 3
## Merc 240D 24.4 4 4
## Merc 230 22.8 4 4
## Merc 280 19.2 6 4
## Merc 280C 17.8 6 4
## Merc 450SE 16.4 8 3
## Merc 450SL 17.3 8 3
## Merc 450SLC 15.2 8 3
## Cadillac Fleetwood 10.4 8 3
## Lincoln Continental 10.4 8 3
## Chrysler Imperial 14.7 8 3
## Fiat 128 32.4 4 4
## Honda Civic 30.4 4 4
## Toyota Corolla 33.9 4 4
## Toyota Corona 21.5 4 3
## Dodge Challenger 15.5 8 3
## AMC Javelin 15.2 8 3
## Camaro Z28 13.3 8 3
## Pontiac Firebird 19.2 8 3
## Fiat X1-9 27.3 4 4
## Porsche 914-2 26.0 4 5
## Lotus Europa 30.4 4 5
## Ford Pantera L 15.8 8 5
## Ferrari Dino 19.7 6 5
## Maserati Bora 15.0 8 5
## Volvo 142E 21.4 4 4
# credit: https://twitter.com/WeAreRLadies/status/1044528935470415877/photo/1
Order matters.
x <- c(1, 2, 3) ; y <- c(2, 3, 4) ; setdiff(x, y)
## [1] 1
x <- c(1, 2, 3) ; y <- c(2, 3, 4) ; setdiff(y, x)
## [1] 4
j = c(1,2,3) ; k = c(1,2,3) ; setequal(j, k)
## [1] TRUE
l = c(1,2,3) ; m = c(1,2,4) ; setequal(l, m)
## [1] FALSE
(x = 1.263623413134134)
## [1] 1.263623
signif(x, 2)
## [1] 1.3
signif(x, 4)
## [1] 1.264
library(skimr)
# this version of skimr was installed using the following command on 8/Oct/19
# devtools::install_github("ropensci/skimr", ref = "v2")
# other versions threw an error when installing or running
# reference: https://github.com/ropensci/skimr
mtcars %>%
skim_tee() %>%
filter(mpg > 20) %>%
skim_tee()
## ── Data Summary ────────────────────────
## Values
## Name data
## Number of rows 32
## Number of columns 11
## _______________________
## Column type frequency:
## numeric 11
## ________________________
## Group variables None
##
## ── Variable type: numeric ──────────────────────────────────────────────────────
## skim_variable n_missing complete_rate mean sd p0 p25 p50
## 1 mpg 0 1 20.1 6.03 10.4 15.4 19.2
## 2 cyl 0 1 6.19 1.79 4 4 6
## 3 disp 0 1 231. 124. 71.1 121. 196.
## 4 hp 0 1 147. 68.6 52 96.5 123
## 5 drat 0 1 3.60 0.535 2.76 3.08 3.70
## 6 wt 0 1 3.22 0.978 1.51 2.58 3.32
## 7 qsec 0 1 17.8 1.79 14.5 16.9 17.7
## 8 vs 0 1 0.438 0.504 0 0 0
## 9 am 0 1 0.406 0.499 0 0 0
## 10 gear 0 1 3.69 0.738 3 3 4
## 11 carb 0 1 2.81 1.62 1 2 2
## p75 p100 hist
## 1 22.8 33.9 ▃▇▅▁▂
## 2 8 8 ▆▁▃▁▇
## 3 326 472 ▇▃▃▃▂
## 4 180 335 ▇▇▆▃▁
## 5 3.92 4.93 ▇▃▇▅▁
## 6 3.61 5.42 ▃▃▇▁▂
## 7 18.9 22.9 ▃▇▇▂▁
## 8 1 1 ▇▁▁▁▆
## 9 1 1 ▇▁▁▁▆
## 10 4 5 ▇▁▆▁▂
## 11 4 8 ▇▂▅▁▁
## ── Data Summary ────────────────────────
## Values
## Name data
## Number of rows 14
## Number of columns 11
## _______________________
## Column type frequency:
## numeric 11
## ________________________
## Group variables None
##
## ── Variable type: numeric ──────────────────────────────────────────────────────
## skim_variable n_missing complete_rate mean sd p0 p25 p50
## 1 mpg 0 1 25.5 4.60 21 21.4 23.6
## 2 cyl 0 1 4.43 0.852 4 4 4
## 3 disp 0 1 124. 49.4 71.1 83.0 120.
## 4 hp 0 1 88.5 21.7 52 66 94
## 5 drat 0 1 3.98 0.416 3.08 3.79 3.91
## 6 wt 0 1 2.42 0.577 1.51 1.99 2.39
## 7 qsec 0 1 18.8 1.72 16.5 17.4 18.8
## 8 vs 0 1 0.786 0.426 0 1 1
## 9 am 0 1 0.714 0.469 0 0.25 1
## 10 gear 0 1 4 0.555 3 4 4
## 11 carb 0 1 1.86 1.03 1 1 2
## p75 p100 hist
## 1 29.6 33.9 ▇▂▁▂▂
## 2 4 6 ▇▁▁▁▂
## 3 145. 258 ▇▅▃▁▁
## 4 110. 113 ▃▅▁▆▇
## 5 4.10 4.93 ▁▃▇▂▁
## 6 2.85 3.22 ▆▃▆▃▇
## 7 19.8 22.9 ▆▆▇▁▂
## 8 1 1 ▂▁▁▁▇
## 9 1 1 ▃▁▁▁▇
## 10 4 5 ▂▁▇▁▂
## 11 2 4 ▇▇▁▁▂
suppressPackageStartupMessages(library(dplyr))
# library(dplyr)
ID <- c(rep("A", 5), rep("B", 4), rep("C",3), rep("D",2), "E")
Course <- c("A1","A2","A3","A4","A5","B1","B2","B3","B4","C1","C2","C3","D1","D2","E1")
Cost <- c(100, 200, 300, 400, 500, 400, 300, 200, 200, 200, 300, 300, 123, 234, 111)
(DF_1 <- data.frame(ID,Course, Cost))
## ID Course Cost
## 1 A A1 100
## 2 A A2 200
## 3 A A3 300
## 4 A A4 400
## 5 A A5 500
## 6 B B1 400
## 7 B B2 300
## 8 B B3 200
## 9 B B4 200
## 10 C C1 200
## 11 C C2 300
## 12 C C3 300
## 13 D D1 123
## 14 D D2 234
## 15 E E1 111
DF_1 %>% group_by(ID) %>% slice(1) # first row for each group
## # A tibble: 5 × 3
## # Groups: ID [5]
## ID Course Cost
## <chr> <chr> <dbl>
## 1 A A1 100
## 2 B B1 400
## 3 C C1 200
## 4 D D1 123
## 5 E E1 111
DF_1 %>% group_by(ID) %>% slice(2) # second row
## # A tibble: 4 × 3
## # Groups: ID [4]
## ID Course Cost
## <chr> <chr> <dbl>
## 1 A A2 200
## 2 B B2 300
## 3 C C2 300
## 4 D D2 234
DF_1 %>% group_by(ID) %>% slice(5) # fifth row, if any
## # A tibble: 1 × 3
## # Groups: ID [1]
## ID Course Cost
## <chr> <chr> <dbl>
## 1 A A5 500
DF_1 %>% group_by(ID) %>% slice(n()) # last row
## # A tibble: 5 × 3
## # Groups: ID [5]
## ID Course Cost
## <chr> <chr> <dbl>
## 1 A A5 500
## 2 B B4 200
## 3 C C3 300
## 4 D D2 234
## 5 E E1 111
DF_1 %>% group_by(ID) %>% slice(n()-1) # last row but one
## # A tibble: 4 × 3
## # Groups: ID [4]
## ID Course Cost
## <chr> <chr> <dbl>
## 1 A A4 400
## 2 B B3 200
## 3 C C2 300
## 4 D D1 123
DF_1 %>% group_by(ID) %>% arrange(Cost) %>% slice(1) # sort by Cost, giving cheapest course in each ID group
## # A tibble: 5 × 3
## # Groups: ID [5]
## ID Course Cost
## <chr> <chr> <dbl>
## 1 A A1 100
## 2 B B3 200
## 3 C C1 200
## 4 D D1 123
## 5 E E1 111
# Get first and last row for each group, three versions
DF_1 %>% group_by(ID) %>% filter(row_number()==1 | row_number()==n())
## # A tibble: 9 × 3
## # Groups: ID [5]
## ID Course Cost
## <chr> <chr> <dbl>
## 1 A A1 100
## 2 A A5 500
## 3 B B1 400
## 4 B B4 200
## 5 C C1 200
## 6 C C3 300
## 7 D D1 123
## 8 D D2 234
## 9 E E1 111
DF_1 %>% group_by(ID) %>% filter(row_number() %in% c(1, n()))
## # A tibble: 9 × 3
## # Groups: ID [5]
## ID Course Cost
## <chr> <chr> <dbl>
## 1 A A1 100
## 2 A A5 500
## 3 B B1 400
## 4 B B4 200
## 5 C C1 200
## 6 C C3 300
## 7 D D1 123
## 8 D D2 234
## 9 E E1 111
DF_1 %>% group_by(ID) %>% slice(c(1, n())) # NB repetition if only one in group
## # A tibble: 10 × 3
## # Groups: ID [5]
## ID Course Cost
## <chr> <chr> <dbl>
## 1 A A1 100
## 2 A A5 500
## 3 B B1 400
## 4 B B4 200
## 5 C C1 200
## 6 C C3 300
## 7 D D1 123
## 8 D D2 234
## 9 E E1 111
## 10 E E1 111
# First, third and last
DF_1 %>% group_by(ID) %>% slice(c(1, 3, n())) # NB the repetition!!!
## # A tibble: 13 × 3
## # Groups: ID [5]
## ID Course Cost
## <chr> <chr> <dbl>
## 1 A A1 100
## 2 A A3 300
## 3 A A5 500
## 4 B B1 400
## 5 B B3 200
## 6 B B4 200
## 7 C C1 200
## 8 C C3 300
## 9 C C3 300
## 10 D D1 123
## 11 D D2 234
## 12 E E1 111
## 13 E E1 111
slice(mtcars,1L)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21 6 160 110 3.9 2.62 16.46 0 1 4 4
slice(mtcars,n())
## mpg cyl disp hp drat wt qsec vs am gear carb
## Volvo 142E 21.4 4 121 109 4.11 2.78 18.6 1 1 4 2
slice(mtcars,5:n())
## mpg cyl disp hp drat wt qsec vs am gear carb
## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
## Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
## Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
## Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
## Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
## Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
## Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
## Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
## Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
## Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
## Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
## Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
## AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
## Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
## Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
## Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
## Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
## Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
## Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
## Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
## Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
## Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
slice(mtcars,1:5)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
a = c(1,2,3) ; str(a) # display the internal *str*ucture of an R object
## num [1:3] 1 2 3
summary(a) # gives a “summary” of a, usually a statistical summary but it is generic meaning it has different
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.0 1.5 2.0 2.0 2.5 3.0
mtcars %>%
tibble::rownames_to_column(var="car_name") %>%
select(car_name, mpg) %>%
filter(str_detect(tolower(car_name), pattern = "merc"))
## car_name mpg
## 1 Merc 240D 24.4
## 2 Merc 230 22.8
## 3 Merc 280 19.2
## 4 Merc 280C 17.8
## 5 Merc 450SE 16.4
## 6 Merc 450SL 17.3
## 7 Merc 450SLC 15.2
Sequences.
Sequences of numbers
seq(1,20,1) # generate a sequence from 1 to 20 in steps of 1
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
seq(1,20,2) # generate a sequence from 1 to 20 in steps of 2
## [1] 1 3 5 7 9 11 13 15 17 19
seq(20,1,-2) # generate a sequence from 20 to 1 in steps of -2
## [1] 20 18 16 14 12 10 8 6 4 2
seq(1, 2, length.out = 4) # decide the length of the output
## [1] 1.000000 1.333333 1.666667 2.000000
Sequences of Dates.
## first days of years
seq(as.Date("1910/1/1"), as.Date("1999/1/1"), "years")
## [1] "1910-01-01" "1911-01-01" "1912-01-01" "1913-01-01" "1914-01-01"
## [6] "1915-01-01" "1916-01-01" "1917-01-01" "1918-01-01" "1919-01-01"
## [11] "1920-01-01" "1921-01-01" "1922-01-01" "1923-01-01" "1924-01-01"
## [16] "1925-01-01" "1926-01-01" "1927-01-01" "1928-01-01" "1929-01-01"
## [21] "1930-01-01" "1931-01-01" "1932-01-01" "1933-01-01" "1934-01-01"
## [26] "1935-01-01" "1936-01-01" "1937-01-01" "1938-01-01" "1939-01-01"
## [31] "1940-01-01" "1941-01-01" "1942-01-01" "1943-01-01" "1944-01-01"
## [36] "1945-01-01" "1946-01-01" "1947-01-01" "1948-01-01" "1949-01-01"
## [41] "1950-01-01" "1951-01-01" "1952-01-01" "1953-01-01" "1954-01-01"
## [46] "1955-01-01" "1956-01-01" "1957-01-01" "1958-01-01" "1959-01-01"
## [51] "1960-01-01" "1961-01-01" "1962-01-01" "1963-01-01" "1964-01-01"
## [56] "1965-01-01" "1966-01-01" "1967-01-01" "1968-01-01" "1969-01-01"
## [61] "1970-01-01" "1971-01-01" "1972-01-01" "1973-01-01" "1974-01-01"
## [66] "1975-01-01" "1976-01-01" "1977-01-01" "1978-01-01" "1979-01-01"
## [71] "1980-01-01" "1981-01-01" "1982-01-01" "1983-01-01" "1984-01-01"
## [76] "1985-01-01" "1986-01-01" "1987-01-01" "1988-01-01" "1989-01-01"
## [81] "1990-01-01" "1991-01-01" "1992-01-01" "1993-01-01" "1994-01-01"
## [86] "1995-01-01" "1996-01-01" "1997-01-01" "1998-01-01" "1999-01-01"
## by month
seq(as.Date("2000/1/1"), by = "month", length.out = 24)
## [1] "2000-01-01" "2000-02-01" "2000-03-01" "2000-04-01" "2000-05-01"
## [6] "2000-06-01" "2000-07-01" "2000-08-01" "2000-09-01" "2000-10-01"
## [11] "2000-11-01" "2000-12-01" "2001-01-01" "2001-02-01" "2001-03-01"
## [16] "2001-04-01" "2001-05-01" "2001-06-01" "2001-07-01" "2001-08-01"
## [21] "2001-09-01" "2001-10-01" "2001-11-01" "2001-12-01"
## quarters
seq(as.Date("2000/1/1"), as.Date("2003/1/1"), by = "quarter")
## [1] "2000-01-01" "2000-04-01" "2000-07-01" "2000-10-01" "2001-01-01"
## [6] "2001-04-01" "2001-07-01" "2001-10-01" "2002-01-01" "2002-04-01"
## [11] "2002-07-01" "2002-10-01" "2003-01-01"
## weeks
seq(as.Date("2000/1/1"), as.Date("2003/1/1"), by = "weeks")
## [1] "2000-01-01" "2000-01-08" "2000-01-15" "2000-01-22" "2000-01-29"
## [6] "2000-02-05" "2000-02-12" "2000-02-19" "2000-02-26" "2000-03-04"
## [11] "2000-03-11" "2000-03-18" "2000-03-25" "2000-04-01" "2000-04-08"
## [16] "2000-04-15" "2000-04-22" "2000-04-29" "2000-05-06" "2000-05-13"
## [21] "2000-05-20" "2000-05-27" "2000-06-03" "2000-06-10" "2000-06-17"
## [26] "2000-06-24" "2000-07-01" "2000-07-08" "2000-07-15" "2000-07-22"
## [31] "2000-07-29" "2000-08-05" "2000-08-12" "2000-08-19" "2000-08-26"
## [36] "2000-09-02" "2000-09-09" "2000-09-16" "2000-09-23" "2000-09-30"
## [41] "2000-10-07" "2000-10-14" "2000-10-21" "2000-10-28" "2000-11-04"
## [46] "2000-11-11" "2000-11-18" "2000-11-25" "2000-12-02" "2000-12-09"
## [51] "2000-12-16" "2000-12-23" "2000-12-30" "2001-01-06" "2001-01-13"
## [56] "2001-01-20" "2001-01-27" "2001-02-03" "2001-02-10" "2001-02-17"
## [61] "2001-02-24" "2001-03-03" "2001-03-10" "2001-03-17" "2001-03-24"
## [66] "2001-03-31" "2001-04-07" "2001-04-14" "2001-04-21" "2001-04-28"
## [71] "2001-05-05" "2001-05-12" "2001-05-19" "2001-05-26" "2001-06-02"
## [76] "2001-06-09" "2001-06-16" "2001-06-23" "2001-06-30" "2001-07-07"
## [81] "2001-07-14" "2001-07-21" "2001-07-28" "2001-08-04" "2001-08-11"
## [86] "2001-08-18" "2001-08-25" "2001-09-01" "2001-09-08" "2001-09-15"
## [91] "2001-09-22" "2001-09-29" "2001-10-06" "2001-10-13" "2001-10-20"
## [96] "2001-10-27" "2001-11-03" "2001-11-10" "2001-11-17" "2001-11-24"
## [101] "2001-12-01" "2001-12-08" "2001-12-15" "2001-12-22" "2001-12-29"
## [106] "2002-01-05" "2002-01-12" "2002-01-19" "2002-01-26" "2002-02-02"
## [111] "2002-02-09" "2002-02-16" "2002-02-23" "2002-03-02" "2002-03-09"
## [116] "2002-03-16" "2002-03-23" "2002-03-30" "2002-04-06" "2002-04-13"
## [121] "2002-04-20" "2002-04-27" "2002-05-04" "2002-05-11" "2002-05-18"
## [126] "2002-05-25" "2002-06-01" "2002-06-08" "2002-06-15" "2002-06-22"
## [131] "2002-06-29" "2002-07-06" "2002-07-13" "2002-07-20" "2002-07-27"
## [136] "2002-08-03" "2002-08-10" "2002-08-17" "2002-08-24" "2002-08-31"
## [141] "2002-09-07" "2002-09-14" "2002-09-21" "2002-09-28" "2002-10-05"
## [146] "2002-10-12" "2002-10-19" "2002-10-26" "2002-11-02" "2002-11-09"
## [151] "2002-11-16" "2002-11-23" "2002-11-30" "2002-12-07" "2002-12-14"
## [156] "2002-12-21" "2002-12-28"
## days
seq(as.Date("2000/1/1"), as.Date("2003/1/1"), by = "days")
## [1] "2000-01-01" "2000-01-02" "2000-01-03" "2000-01-04" "2000-01-05"
## [6] "2000-01-06" "2000-01-07" "2000-01-08" "2000-01-09" "2000-01-10"
## [11] "2000-01-11" "2000-01-12" "2000-01-13" "2000-01-14" "2000-01-15"
## [16] "2000-01-16" "2000-01-17" "2000-01-18" "2000-01-19" "2000-01-20"
## [21] "2000-01-21" "2000-01-22" "2000-01-23" "2000-01-24" "2000-01-25"
## [26] "2000-01-26" "2000-01-27" "2000-01-28" "2000-01-29" "2000-01-30"
## [31] "2000-01-31" "2000-02-01" "2000-02-02" "2000-02-03" "2000-02-04"
## [36] "2000-02-05" "2000-02-06" "2000-02-07" "2000-02-08" "2000-02-09"
## [41] "2000-02-10" "2000-02-11" "2000-02-12" "2000-02-13" "2000-02-14"
## [46] "2000-02-15" "2000-02-16" "2000-02-17" "2000-02-18" "2000-02-19"
## [51] "2000-02-20" "2000-02-21" "2000-02-22" "2000-02-23" "2000-02-24"
## [56] "2000-02-25" "2000-02-26" "2000-02-27" "2000-02-28" "2000-02-29"
## [61] "2000-03-01" "2000-03-02" "2000-03-03" "2000-03-04" "2000-03-05"
## [66] "2000-03-06" "2000-03-07" "2000-03-08" "2000-03-09" "2000-03-10"
## [71] "2000-03-11" "2000-03-12" "2000-03-13" "2000-03-14" "2000-03-15"
## [76] "2000-03-16" "2000-03-17" "2000-03-18" "2000-03-19" "2000-03-20"
## [81] "2000-03-21" "2000-03-22" "2000-03-23" "2000-03-24" "2000-03-25"
## [86] "2000-03-26" "2000-03-27" "2000-03-28" "2000-03-29" "2000-03-30"
## [91] "2000-03-31" "2000-04-01" "2000-04-02" "2000-04-03" "2000-04-04"
## [96] "2000-04-05" "2000-04-06" "2000-04-07" "2000-04-08" "2000-04-09"
## [101] "2000-04-10" "2000-04-11" "2000-04-12" "2000-04-13" "2000-04-14"
## [106] "2000-04-15" "2000-04-16" "2000-04-17" "2000-04-18" "2000-04-19"
## [111] "2000-04-20" "2000-04-21" "2000-04-22" "2000-04-23" "2000-04-24"
## [116] "2000-04-25" "2000-04-26" "2000-04-27" "2000-04-28" "2000-04-29"
## [121] "2000-04-30" "2000-05-01" "2000-05-02" "2000-05-03" "2000-05-04"
## [126] "2000-05-05" "2000-05-06" "2000-05-07" "2000-05-08" "2000-05-09"
## [131] "2000-05-10" "2000-05-11" "2000-05-12" "2000-05-13" "2000-05-14"
## [136] "2000-05-15" "2000-05-16" "2000-05-17" "2000-05-18" "2000-05-19"
## [141] "2000-05-20" "2000-05-21" "2000-05-22" "2000-05-23" "2000-05-24"
## [146] "2000-05-25" "2000-05-26" "2000-05-27" "2000-05-28" "2000-05-29"
## [151] "2000-05-30" "2000-05-31" "2000-06-01" "2000-06-02" "2000-06-03"
## [156] "2000-06-04" "2000-06-05" "2000-06-06" "2000-06-07" "2000-06-08"
## [161] "2000-06-09" "2000-06-10" "2000-06-11" "2000-06-12" "2000-06-13"
## [166] "2000-06-14" "2000-06-15" "2000-06-16" "2000-06-17" "2000-06-18"
## [171] "2000-06-19" "2000-06-20" "2000-06-21" "2000-06-22" "2000-06-23"
## [176] "2000-06-24" "2000-06-25" "2000-06-26" "2000-06-27" "2000-06-28"
## [181] "2000-06-29" "2000-06-30" "2000-07-01" "2000-07-02" "2000-07-03"
## [186] "2000-07-04" "2000-07-05" "2000-07-06" "2000-07-07" "2000-07-08"
## [191] "2000-07-09" "2000-07-10" "2000-07-11" "2000-07-12" "2000-07-13"
## [196] "2000-07-14" "2000-07-15" "2000-07-16" "2000-07-17" "2000-07-18"
## [201] "2000-07-19" "2000-07-20" "2000-07-21" "2000-07-22" "2000-07-23"
## [206] "2000-07-24" "2000-07-25" "2000-07-26" "2000-07-27" "2000-07-28"
## [211] "2000-07-29" "2000-07-30" "2000-07-31" "2000-08-01" "2000-08-02"
## [216] "2000-08-03" "2000-08-04" "2000-08-05" "2000-08-06" "2000-08-07"
## [221] "2000-08-08" "2000-08-09" "2000-08-10" "2000-08-11" "2000-08-12"
## [226] "2000-08-13" "2000-08-14" "2000-08-15" "2000-08-16" "2000-08-17"
## [231] "2000-08-18" "2000-08-19" "2000-08-20" "2000-08-21" "2000-08-22"
## [236] "2000-08-23" "2000-08-24" "2000-08-25" "2000-08-26" "2000-08-27"
## [241] "2000-08-28" "2000-08-29" "2000-08-30" "2000-08-31" "2000-09-01"
## [246] "2000-09-02" "2000-09-03" "2000-09-04" "2000-09-05" "2000-09-06"
## [251] "2000-09-07" "2000-09-08" "2000-09-09" "2000-09-10" "2000-09-11"
## [256] "2000-09-12" "2000-09-13" "2000-09-14" "2000-09-15" "2000-09-16"
## [261] "2000-09-17" "2000-09-18" "2000-09-19" "2000-09-20" "2000-09-21"
## [266] "2000-09-22" "2000-09-23" "2000-09-24" "2000-09-25" "2000-09-26"
## [271] "2000-09-27" "2000-09-28" "2000-09-29" "2000-09-30" "2000-10-01"
## [276] "2000-10-02" "2000-10-03" "2000-10-04" "2000-10-05" "2000-10-06"
## [281] "2000-10-07" "2000-10-08" "2000-10-09" "2000-10-10" "2000-10-11"
## [286] "2000-10-12" "2000-10-13" "2000-10-14" "2000-10-15" "2000-10-16"
## [291] "2000-10-17" "2000-10-18" "2000-10-19" "2000-10-20" "2000-10-21"
## [296] "2000-10-22" "2000-10-23" "2000-10-24" "2000-10-25" "2000-10-26"
## [301] "2000-10-27" "2000-10-28" "2000-10-29" "2000-10-30" "2000-10-31"
## [306] "2000-11-01" "2000-11-02" "2000-11-03" "2000-11-04" "2000-11-05"
## [311] "2000-11-06" "2000-11-07" "2000-11-08" "2000-11-09" "2000-11-10"
## [316] "2000-11-11" "2000-11-12" "2000-11-13" "2000-11-14" "2000-11-15"
## [321] "2000-11-16" "2000-11-17" "2000-11-18" "2000-11-19" "2000-11-20"
## [326] "2000-11-21" "2000-11-22" "2000-11-23" "2000-11-24" "2000-11-25"
## [331] "2000-11-26" "2000-11-27" "2000-11-28" "2000-11-29" "2000-11-30"
## [336] "2000-12-01" "2000-12-02" "2000-12-03" "2000-12-04" "2000-12-05"
## [341] "2000-12-06" "2000-12-07" "2000-12-08" "2000-12-09" "2000-12-10"
## [346] "2000-12-11" "2000-12-12" "2000-12-13" "2000-12-14" "2000-12-15"
## [351] "2000-12-16" "2000-12-17" "2000-12-18" "2000-12-19" "2000-12-20"
## [356] "2000-12-21" "2000-12-22" "2000-12-23" "2000-12-24" "2000-12-25"
## [361] "2000-12-26" "2000-12-27" "2000-12-28" "2000-12-29" "2000-12-30"
## [366] "2000-12-31" "2001-01-01" "2001-01-02" "2001-01-03" "2001-01-04"
## [371] "2001-01-05" "2001-01-06" "2001-01-07" "2001-01-08" "2001-01-09"
## [376] "2001-01-10" "2001-01-11" "2001-01-12" "2001-01-13" "2001-01-14"
## [381] "2001-01-15" "2001-01-16" "2001-01-17" "2001-01-18" "2001-01-19"
## [386] "2001-01-20" "2001-01-21" "2001-01-22" "2001-01-23" "2001-01-24"
## [391] "2001-01-25" "2001-01-26" "2001-01-27" "2001-01-28" "2001-01-29"
## [396] "2001-01-30" "2001-01-31" "2001-02-01" "2001-02-02" "2001-02-03"
## [401] "2001-02-04" "2001-02-05" "2001-02-06" "2001-02-07" "2001-02-08"
## [406] "2001-02-09" "2001-02-10" "2001-02-11" "2001-02-12" "2001-02-13"
## [411] "2001-02-14" "2001-02-15" "2001-02-16" "2001-02-17" "2001-02-18"
## [416] "2001-02-19" "2001-02-20" "2001-02-21" "2001-02-22" "2001-02-23"
## [421] "2001-02-24" "2001-02-25" "2001-02-26" "2001-02-27" "2001-02-28"
## [426] "2001-03-01" "2001-03-02" "2001-03-03" "2001-03-04" "2001-03-05"
## [431] "2001-03-06" "2001-03-07" "2001-03-08" "2001-03-09" "2001-03-10"
## [436] "2001-03-11" "2001-03-12" "2001-03-13" "2001-03-14" "2001-03-15"
## [441] "2001-03-16" "2001-03-17" "2001-03-18" "2001-03-19" "2001-03-20"
## [446] "2001-03-21" "2001-03-22" "2001-03-23" "2001-03-24" "2001-03-25"
## [451] "2001-03-26" "2001-03-27" "2001-03-28" "2001-03-29" "2001-03-30"
## [456] "2001-03-31" "2001-04-01" "2001-04-02" "2001-04-03" "2001-04-04"
## [461] "2001-04-05" "2001-04-06" "2001-04-07" "2001-04-08" "2001-04-09"
## [466] "2001-04-10" "2001-04-11" "2001-04-12" "2001-04-13" "2001-04-14"
## [471] "2001-04-15" "2001-04-16" "2001-04-17" "2001-04-18" "2001-04-19"
## [476] "2001-04-20" "2001-04-21" "2001-04-22" "2001-04-23" "2001-04-24"
## [481] "2001-04-25" "2001-04-26" "2001-04-27" "2001-04-28" "2001-04-29"
## [486] "2001-04-30" "2001-05-01" "2001-05-02" "2001-05-03" "2001-05-04"
## [491] "2001-05-05" "2001-05-06" "2001-05-07" "2001-05-08" "2001-05-09"
## [496] "2001-05-10" "2001-05-11" "2001-05-12" "2001-05-13" "2001-05-14"
## [501] "2001-05-15" "2001-05-16" "2001-05-17" "2001-05-18" "2001-05-19"
## [506] "2001-05-20" "2001-05-21" "2001-05-22" "2001-05-23" "2001-05-24"
## [511] "2001-05-25" "2001-05-26" "2001-05-27" "2001-05-28" "2001-05-29"
## [516] "2001-05-30" "2001-05-31" "2001-06-01" "2001-06-02" "2001-06-03"
## [521] "2001-06-04" "2001-06-05" "2001-06-06" "2001-06-07" "2001-06-08"
## [526] "2001-06-09" "2001-06-10" "2001-06-11" "2001-06-12" "2001-06-13"
## [531] "2001-06-14" "2001-06-15" "2001-06-16" "2001-06-17" "2001-06-18"
## [536] "2001-06-19" "2001-06-20" "2001-06-21" "2001-06-22" "2001-06-23"
## [541] "2001-06-24" "2001-06-25" "2001-06-26" "2001-06-27" "2001-06-28"
## [546] "2001-06-29" "2001-06-30" "2001-07-01" "2001-07-02" "2001-07-03"
## [551] "2001-07-04" "2001-07-05" "2001-07-06" "2001-07-07" "2001-07-08"
## [556] "2001-07-09" "2001-07-10" "2001-07-11" "2001-07-12" "2001-07-13"
## [561] "2001-07-14" "2001-07-15" "2001-07-16" "2001-07-17" "2001-07-18"
## [566] "2001-07-19" "2001-07-20" "2001-07-21" "2001-07-22" "2001-07-23"
## [571] "2001-07-24" "2001-07-25" "2001-07-26" "2001-07-27" "2001-07-28"
## [576] "2001-07-29" "2001-07-30" "2001-07-31" "2001-08-01" "2001-08-02"
## [581] "2001-08-03" "2001-08-04" "2001-08-05" "2001-08-06" "2001-08-07"
## [586] "2001-08-08" "2001-08-09" "2001-08-10" "2001-08-11" "2001-08-12"
## [591] "2001-08-13" "2001-08-14" "2001-08-15" "2001-08-16" "2001-08-17"
## [596] "2001-08-18" "2001-08-19" "2001-08-20" "2001-08-21" "2001-08-22"
## [601] "2001-08-23" "2001-08-24" "2001-08-25" "2001-08-26" "2001-08-27"
## [606] "2001-08-28" "2001-08-29" "2001-08-30" "2001-08-31" "2001-09-01"
## [611] "2001-09-02" "2001-09-03" "2001-09-04" "2001-09-05" "2001-09-06"
## [616] "2001-09-07" "2001-09-08" "2001-09-09" "2001-09-10" "2001-09-11"
## [621] "2001-09-12" "2001-09-13" "2001-09-14" "2001-09-15" "2001-09-16"
## [626] "2001-09-17" "2001-09-18" "2001-09-19" "2001-09-20" "2001-09-21"
## [631] "2001-09-22" "2001-09-23" "2001-09-24" "2001-09-25" "2001-09-26"
## [636] "2001-09-27" "2001-09-28" "2001-09-29" "2001-09-30" "2001-10-01"
## [641] "2001-10-02" "2001-10-03" "2001-10-04" "2001-10-05" "2001-10-06"
## [646] "2001-10-07" "2001-10-08" "2001-10-09" "2001-10-10" "2001-10-11"
## [651] "2001-10-12" "2001-10-13" "2001-10-14" "2001-10-15" "2001-10-16"
## [656] "2001-10-17" "2001-10-18" "2001-10-19" "2001-10-20" "2001-10-21"
## [661] "2001-10-22" "2001-10-23" "2001-10-24" "2001-10-25" "2001-10-26"
## [666] "2001-10-27" "2001-10-28" "2001-10-29" "2001-10-30" "2001-10-31"
## [671] "2001-11-01" "2001-11-02" "2001-11-03" "2001-11-04" "2001-11-05"
## [676] "2001-11-06" "2001-11-07" "2001-11-08" "2001-11-09" "2001-11-10"
## [681] "2001-11-11" "2001-11-12" "2001-11-13" "2001-11-14" "2001-11-15"
## [686] "2001-11-16" "2001-11-17" "2001-11-18" "2001-11-19" "2001-11-20"
## [691] "2001-11-21" "2001-11-22" "2001-11-23" "2001-11-24" "2001-11-25"
## [696] "2001-11-26" "2001-11-27" "2001-11-28" "2001-11-29" "2001-11-30"
## [701] "2001-12-01" "2001-12-02" "2001-12-03" "2001-12-04" "2001-12-05"
## [706] "2001-12-06" "2001-12-07" "2001-12-08" "2001-12-09" "2001-12-10"
## [711] "2001-12-11" "2001-12-12" "2001-12-13" "2001-12-14" "2001-12-15"
## [716] "2001-12-16" "2001-12-17" "2001-12-18" "2001-12-19" "2001-12-20"
## [721] "2001-12-21" "2001-12-22" "2001-12-23" "2001-12-24" "2001-12-25"
## [726] "2001-12-26" "2001-12-27" "2001-12-28" "2001-12-29" "2001-12-30"
## [731] "2001-12-31" "2002-01-01" "2002-01-02" "2002-01-03" "2002-01-04"
## [736] "2002-01-05" "2002-01-06" "2002-01-07" "2002-01-08" "2002-01-09"
## [741] "2002-01-10" "2002-01-11" "2002-01-12" "2002-01-13" "2002-01-14"
## [746] "2002-01-15" "2002-01-16" "2002-01-17" "2002-01-18" "2002-01-19"
## [751] "2002-01-20" "2002-01-21" "2002-01-22" "2002-01-23" "2002-01-24"
## [756] "2002-01-25" "2002-01-26" "2002-01-27" "2002-01-28" "2002-01-29"
## [761] "2002-01-30" "2002-01-31" "2002-02-01" "2002-02-02" "2002-02-03"
## [766] "2002-02-04" "2002-02-05" "2002-02-06" "2002-02-07" "2002-02-08"
## [771] "2002-02-09" "2002-02-10" "2002-02-11" "2002-02-12" "2002-02-13"
## [776] "2002-02-14" "2002-02-15" "2002-02-16" "2002-02-17" "2002-02-18"
## [781] "2002-02-19" "2002-02-20" "2002-02-21" "2002-02-22" "2002-02-23"
## [786] "2002-02-24" "2002-02-25" "2002-02-26" "2002-02-27" "2002-02-28"
## [791] "2002-03-01" "2002-03-02" "2002-03-03" "2002-03-04" "2002-03-05"
## [796] "2002-03-06" "2002-03-07" "2002-03-08" "2002-03-09" "2002-03-10"
## [801] "2002-03-11" "2002-03-12" "2002-03-13" "2002-03-14" "2002-03-15"
## [806] "2002-03-16" "2002-03-17" "2002-03-18" "2002-03-19" "2002-03-20"
## [811] "2002-03-21" "2002-03-22" "2002-03-23" "2002-03-24" "2002-03-25"
## [816] "2002-03-26" "2002-03-27" "2002-03-28" "2002-03-29" "2002-03-30"
## [821] "2002-03-31" "2002-04-01" "2002-04-02" "2002-04-03" "2002-04-04"
## [826] "2002-04-05" "2002-04-06" "2002-04-07" "2002-04-08" "2002-04-09"
## [831] "2002-04-10" "2002-04-11" "2002-04-12" "2002-04-13" "2002-04-14"
## [836] "2002-04-15" "2002-04-16" "2002-04-17" "2002-04-18" "2002-04-19"
## [841] "2002-04-20" "2002-04-21" "2002-04-22" "2002-04-23" "2002-04-24"
## [846] "2002-04-25" "2002-04-26" "2002-04-27" "2002-04-28" "2002-04-29"
## [851] "2002-04-30" "2002-05-01" "2002-05-02" "2002-05-03" "2002-05-04"
## [856] "2002-05-05" "2002-05-06" "2002-05-07" "2002-05-08" "2002-05-09"
## [861] "2002-05-10" "2002-05-11" "2002-05-12" "2002-05-13" "2002-05-14"
## [866] "2002-05-15" "2002-05-16" "2002-05-17" "2002-05-18" "2002-05-19"
## [871] "2002-05-20" "2002-05-21" "2002-05-22" "2002-05-23" "2002-05-24"
## [876] "2002-05-25" "2002-05-26" "2002-05-27" "2002-05-28" "2002-05-29"
## [881] "2002-05-30" "2002-05-31" "2002-06-01" "2002-06-02" "2002-06-03"
## [886] "2002-06-04" "2002-06-05" "2002-06-06" "2002-06-07" "2002-06-08"
## [891] "2002-06-09" "2002-06-10" "2002-06-11" "2002-06-12" "2002-06-13"
## [896] "2002-06-14" "2002-06-15" "2002-06-16" "2002-06-17" "2002-06-18"
## [901] "2002-06-19" "2002-06-20" "2002-06-21" "2002-06-22" "2002-06-23"
## [906] "2002-06-24" "2002-06-25" "2002-06-26" "2002-06-27" "2002-06-28"
## [911] "2002-06-29" "2002-06-30" "2002-07-01" "2002-07-02" "2002-07-03"
## [916] "2002-07-04" "2002-07-05" "2002-07-06" "2002-07-07" "2002-07-08"
## [921] "2002-07-09" "2002-07-10" "2002-07-11" "2002-07-12" "2002-07-13"
## [926] "2002-07-14" "2002-07-15" "2002-07-16" "2002-07-17" "2002-07-18"
## [931] "2002-07-19" "2002-07-20" "2002-07-21" "2002-07-22" "2002-07-23"
## [936] "2002-07-24" "2002-07-25" "2002-07-26" "2002-07-27" "2002-07-28"
## [941] "2002-07-29" "2002-07-30" "2002-07-31" "2002-08-01" "2002-08-02"
## [946] "2002-08-03" "2002-08-04" "2002-08-05" "2002-08-06" "2002-08-07"
## [951] "2002-08-08" "2002-08-09" "2002-08-10" "2002-08-11" "2002-08-12"
## [956] "2002-08-13" "2002-08-14" "2002-08-15" "2002-08-16" "2002-08-17"
## [961] "2002-08-18" "2002-08-19" "2002-08-20" "2002-08-21" "2002-08-22"
## [966] "2002-08-23" "2002-08-24" "2002-08-25" "2002-08-26" "2002-08-27"
## [971] "2002-08-28" "2002-08-29" "2002-08-30" "2002-08-31" "2002-09-01"
## [976] "2002-09-02" "2002-09-03" "2002-09-04" "2002-09-05" "2002-09-06"
## [981] "2002-09-07" "2002-09-08" "2002-09-09" "2002-09-10" "2002-09-11"
## [986] "2002-09-12" "2002-09-13" "2002-09-14" "2002-09-15" "2002-09-16"
## [991] "2002-09-17" "2002-09-18" "2002-09-19" "2002-09-20" "2002-09-21"
## [996] "2002-09-22" "2002-09-23" "2002-09-24" "2002-09-25" "2002-09-26"
## [1001] "2002-09-27" "2002-09-28" "2002-09-29" "2002-09-30" "2002-10-01"
## [1006] "2002-10-02" "2002-10-03" "2002-10-04" "2002-10-05" "2002-10-06"
## [1011] "2002-10-07" "2002-10-08" "2002-10-09" "2002-10-10" "2002-10-11"
## [1016] "2002-10-12" "2002-10-13" "2002-10-14" "2002-10-15" "2002-10-16"
## [1021] "2002-10-17" "2002-10-18" "2002-10-19" "2002-10-20" "2002-10-21"
## [1026] "2002-10-22" "2002-10-23" "2002-10-24" "2002-10-25" "2002-10-26"
## [1031] "2002-10-27" "2002-10-28" "2002-10-29" "2002-10-30" "2002-10-31"
## [1036] "2002-11-01" "2002-11-02" "2002-11-03" "2002-11-04" "2002-11-05"
## [1041] "2002-11-06" "2002-11-07" "2002-11-08" "2002-11-09" "2002-11-10"
## [1046] "2002-11-11" "2002-11-12" "2002-11-13" "2002-11-14" "2002-11-15"
## [1051] "2002-11-16" "2002-11-17" "2002-11-18" "2002-11-19" "2002-11-20"
## [1056] "2002-11-21" "2002-11-22" "2002-11-23" "2002-11-24" "2002-11-25"
## [1061] "2002-11-26" "2002-11-27" "2002-11-28" "2002-11-29" "2002-11-30"
## [1066] "2002-12-01" "2002-12-02" "2002-12-03" "2002-12-04" "2002-12-05"
## [1071] "2002-12-06" "2002-12-07" "2002-12-08" "2002-12-09" "2002-12-10"
## [1076] "2002-12-11" "2002-12-12" "2002-12-13" "2002-12-14" "2002-12-15"
## [1081] "2002-12-16" "2002-12-17" "2002-12-18" "2002-12-19" "2002-12-20"
## [1086] "2002-12-21" "2002-12-22" "2002-12-23" "2002-12-24" "2002-12-25"
## [1091] "2002-12-26" "2002-12-27" "2002-12-28" "2002-12-29" "2002-12-30"
## [1096] "2002-12-31" "2003-01-01"
Find all 7th of the month between two dates, the last being a 7th.
st <- as.Date("1998-12-17")
en <- as.Date("2000-1-7")
ll <- seq(en, st, by = "-1 month")
ll
## [1] "2000-01-07" "1999-12-07" "1999-11-07" "1999-10-07" "1999-09-07"
## [6] "1999-08-07" "1999-07-07" "1999-06-07" "1999-05-07" "1999-04-07"
## [11] "1999-03-07" "1999-02-07" "1999-01-07"
rev(ll[ll > st & ll < en])
## [1] "1999-01-07" "1999-02-07" "1999-03-07" "1999-04-07" "1999-05-07"
## [6] "1999-06-07" "1999-07-07" "1999-08-07" "1999-09-07" "1999-10-07"
## [11] "1999-11-07" "1999-12-07"
(DF1 <- read.table(header = TRUE, check.names = FALSE, text = "name variable value
Amy Age 21
Bill Age 32
Cathy Age 41
Amy Salary 21000
Bill Salary 32000
Cathy Salary 41000
Amy Sex F
Bill Sex M
Cathy Sex F"))
## name variable value
## 1 Amy Age 21
## 2 Bill Age 32
## 3 Cathy Age 41
## 4 Amy Salary 21000
## 5 Bill Salary 32000
## 6 Cathy Salary 41000
## 7 Amy Sex F
## 8 Bill Sex M
## 9 Cathy Sex F
(DF2 <- tidyr::spread(DF1, variable, value ))
## name Age Salary Sex
## 1 Amy 21 21000 F
## 2 Bill 32 32000 M
## 3 Cathy 41 41000 F
(x <- c(2, 4, 6, 8, 10)); sum(x)
## [1] 2 4 6 8 10
## [1] 30
trunc takes a single numeric argument x and returns a numeric vector containing the integers formed by truncating the values in x toward 0.
x = c( -2.5, -1.5, 0, 1.5, 2.5 )
trunc(x)
## [1] -2 -1 0 1 2
df <-
data.frame(
id = 1:10,
class = sample(letters, 10),
weight = rep(1:2, 5)
)
df
## id class weight
## 1 1 r 1
## 2 2 l 2
## 3 3 c 1
## 4 4 e 2
## 5 5 i 1
## 6 6 v 2
## 7 7 z 1
## 8 8 q 2
## 9 9 n 1
## 10 10 s 2
tidyr::uncount(df, weight)
## id class
## 1 1 r
## 2 2 l
## 3 2 l
## 4 3 c
## 5 4 e
## 6 4 e
## 7 5 i
## 8 6 v
## 9 6 v
## 10 7 z
## 11 8 q
## 12 8 q
## 13 9 n
## 14 10 s
## 15 10 s
x <- c(1, 2, 3) ; y <- c(2, 3, 4) ; union(x, y)
## [1] 1 2 3 4
which(LETTERS == "R")
## [1] 18
which(LETTERS != "R")
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 19 20 21 22 23 24 25 26
which(ll <- c(TRUE, FALSE, TRUE, NA, FALSE, FALSE, TRUE)) #> 1 3 7
## [1] 1 3 7
names(ll) <- letters[seq(ll)]
ll
## a b c d e f g
## TRUE FALSE TRUE NA FALSE FALSE TRUE
which(ll)
## a c g
## 1 3 7
which((1:12)%%2 == 0) # which are even?
## [1] 2 4 6 8 10 12
which(1:10 > 3, arr.ind = TRUE)
## [1] 4 5 6 7 8 9 10
( mM <- matrix(1:12, 3, 4) )
## [,1] [,2] [,3] [,4]
## [1,] 1 4 7 10
## [2,] 2 5 8 11
## [3,] 3 6 9 12
div.3 <- mM %% 3 == 0
which(div.3)
## [1] 3 6 9 12
which(div.3, arr.ind = TRUE)
## row col
## [1,] 3 1
## [2,] 3 2
## [3,] 3 3
## [4,] 3 4
rownames(mM) <- paste("Case", 1:3, sep = "_")
which(mM %% 5 == 0, arr.ind = TRUE)
## row col
## Case_2 2 2
## Case_1 1 4
dim(mM) <- c(2, 2, 3); mM
## , , 1
##
## [,1] [,2]
## [1,] 1 3
## [2,] 2 4
##
## , , 2
##
## [,1] [,2]
## [1,] 5 7
## [2,] 6 8
##
## , , 3
##
## [,1] [,2]
## [1,] 9 11
## [2,] 10 12
which(div.3, arr.ind = FALSE)
## [1] 3 6 9 12
which(div.3, arr.ind = TRUE)
## row col
## [1,] 3 1
## [2,] 3 2
## [3,] 3 3
## [4,] 3 4
vm <- c(mM)
vm
## [1] 1 2 3 4 5 6 7 8 9 10 11 12
dim(vm) <- length(vm) #-- funny thing with length(dim(...)) == 1
which(div.3, arr.ind = TRUE)
## row col
## [1,] 3 1
## [2,] 3 2
## [3,] 3 3
## [4,] 3 4
which(mtcars$gear == max(mtcars$gear))
## [1] 27 28 29 30 31
which(mtcars$gear == min(mtcars$gear))
## [1] 4 5 6 7 12 13 14 15 16 17 21 22 23 24 25
which(mtcars$gear == 4)
## [1] 1 2 3 8 9 10 11 18 19 20 26 32
# References:
# https://g4greetz.wordpress.com/2017/03/03/which-function-in-r/
mtcars %>%
tibble::rownames_to_column(var="car_name") %>%
select(car_name, mpg, disp, gear) %>%
filter(xor(mpg > 15, gear < 5))
## car_name mpg disp gear
## 1 Duster 360 14.3 360.0 3
## 2 Cadillac Fleetwood 10.4 472.0 3
## 3 Lincoln Continental 10.4 460.0 3
## 4 Chrysler Imperial 14.7 440.0 3
## 5 Camaro Z28 13.3 350.0 3
## 6 Porsche 914-2 26.0 120.3 5
## 7 Lotus Europa 30.4 95.1 5
## 8 Ford Pantera L 15.8 351.0 5
## 9 Ferrari Dino 19.7 145.0 5
# delete a row. The example deletes first three rows.
df1 <- mtcars; head(df1); df1 <- df1[-c(1:3),]; head(df1)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
## mpg cyl disp hp drat wt qsec vs am gear carb
## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
# delete rows conditionally. Remove rows with cyl = 8
df1 <- mtcars; head(df1); df1 <- df1[!(df1$cyl == 8),]; head(df1)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2