“a freely available language and environment for statistical computing and graphics which provides a wide variety of statistical and graphical techniques: linear and nonlinear modelling, statistical tests, time series analysis, classification, clustering, etc”
--internet2
to the end of the Target string on the Shortcut
tabOK
https://www.rstudio.com/products/rstudio/download/
heights <- c(4.7, 5.3, 6.2, 3.8, 4.4, 7.1, 2.5)
heights
## [1] 4.7 5.3 6.2 3.8 4.4 7.1 2.5
length(heights)
## [1] 7
sum(heights)
## [1] 34
mean(heights)
## [1] 4.857143
maxHeight <- max(heights)
maxHeight
## [1] 7.1
heights
## [1] 4.7 5.3 6.2 3.8 4.4 7.1 2.5
heights[5]
## [1] 4.4
heights[3:6]
## [1] 6.2 3.8 4.4 7.1
heights[c(3,6,1)]
## [1] 6.2 7.1 4.7
heights[c(1,1,2,1,6)]
## [1] 4.7 4.7 5.3 4.7 7.1
heights
## [1] 4.7 5.3 6.2 3.8 4.4 7.1 2.5
heights[c(-1,-4)]
## [1] 5.3 6.2 4.4 7.1 2.5
heights > 6
## [1] FALSE FALSE TRUE FALSE FALSE TRUE FALSE
which(heights > 6)
## [1] 3 6
heights[heights > 6]
## [1] 6.2 7.1
if (heights[1] > 6) cat("Large\n") else cat("Small\n")
## Small
# Can spread this over lines
if (heights[6] > 6)
cat("Large\n") else
cat("Small\n")
## Large
# Can do multiple instructions
if (heights[1] < 6) {
cat('Number is very small\n')
cat('Here are all the heights in order',sort(heights))
}
## Number is very small
## Here are all the heights in order 2.5 3.8 4.4 4.7 5.3 6.2 7.1
N <- length(heights)
for (i in 1:N) {
if (heights[i] > 6) {
cat(" Tree", i, "- tall.")
} else {
cat(" Tree", i, "- short.")
}
}
## Tree 1 - short. Tree 2 - short. Tree 3 - tall. Tree 4 - short. Tree 5 - short. Tree 6 - tall. Tree 7 - short.
Determininistic loop - you know in advance how many cycles (N
here)
z <- 1
count <- 0
repeat {
if (z > 100) break
z <- z * 2
count <- count + 1
cat(z,' ')
}
## 2 4 8 16 32 64 128
cat('\n It took',count,' doubles to exceed 100.')
##
## It took 7 doubles to exceed 100.
Non-determininistic loop - you don’t know in advance how many cycles
x <- c("MacOS","Linux","Windows","Windows","MacOS")
xf <- factor(x)
xf # Nominal
## [1] MacOS Linux Windows Windows MacOS
## Levels: Linux MacOS Windows
y <- c("good","very good","good","average","poor","very poor","average")
yf <- factor(y,levels=c("very poor","poor","average","good","very good"),ordered=TRUE)
yf # Ordinal
## [1] good very good good average poor very poor average
## Levels: very poor < poor < average < good < very good
yf[1] > y[3]
## [1] FALSE
xf[1] > xf[3]
## Warning in Ops.factor(xf[1], xf[3]): '>' not meaningful for factors
head(mtcars,n=6)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
mtcars
is a built-in data frame: note that the columns are are different types: character, real, integer. We can use some simple exploratory visualisations…
hist(mtcars$mpg,main="Fuel Consumption")
boxplot(mtcars$mpg,main="Fuel Consumption")
boxplot(mpg~cyl,main='Fuel Consumption',xlab='Cylinders',data=mtcars)
boxplot(mpg~cyl,main='Fuel Consumption',xlab='Cylinders',data=mtcars, horizontal=TRUE)
Note boxplot
also finds outliers
plot(mtcars$wt,mtcars$mpg,main="MPG vs Weight")
Heavier cars have poorer fuel consumption