#########################################
##### Notes for R programming 3/8/17 ####
#########################################
# Control structure
# if, else if, else
a <- 1
if(a<1) {
print("a <1")
} else if (a > 1) {
print ("a > 1")
} else {
print("a == 1")
}
# logical operators
b <-2
if(a == 0 || b == 2) {
print ("a or b")
}
# working with vectors
m <- c(1, 2, 3)
n <- c(6, 5, 4)
# & vectorized,
# && first element only
(m <2) & (n>5) # 2 && is for scalar not vector
# a for loop
for(a in m) {
print (a)
}
# repeat loop, needs break
repeat {
b <- b + 1
if(b==10)
break;
}
b
# while loop
while(b > 0) {
b <- b-1
}
# apply function
m <- matrix(c(1, 2, 4, 2), nrow=2, ncol=2)
m
apply(m, 1, sum) # sum of rows
apply(m, 2, sum) # sum of the columns
apply(m, 2, sort) # sort columns
# data types
# double
x <- 3.14
typeof(x)
# integer
y <- as.integer(3)
typeof(y)
# complex
z <- 3+2i
typeof(z)
# boolean/logical
b <- x==y
typeof(b)
typeof(x + b)
typeof(y + z)
typeof(x + z)
# character objects (strings)
s1 <- "some set of characters, i.e. a string"
typeof(s1)
s2 <- "more string"
paste(s1, s2)
# vectors and matrices
# basic vector
v <- c(7, 10, 4, 3)
v
# scalar is a 1-element vector
y <- 2
# new vector from existing vectors
z <- c(y, y*2, y+1, 0, v)
z
# operation vectors
a <- v*y
a
# different size vectors
a <- c(1, 2)
b <- c(1, 2, 3, 4)
d <- a + b
d
sqrt(d)
# sequence and repeat
v <- 1:10
v
v <- seq(5, 100, by=5)
v
v <- rep(1:5, 5)
v
# matrices
# matrix using matrix ( . . .)
m <- matrix(1:4, 2, 2)
m
# matrix using matrix calling byrow
m <- matrix(1:4, 2, 2, byrow=T)
m
# matrix using dim
v <- 1:4
dim(v) <- c(2, 2)
v
# column and row bind
cbind(c(1, 2), c(3, 4))
rbind(c(3, 4), c(5, 6))
# operation on a matrix
m * 2
# multiplying 2 matrices
m * m
# creating diagonal matrix
v <- c(1, 2, 3)
diag(v)
# generating random numbers
runif(1) # new number each time
runif(1, 1, 10)
sample(1:10, 10)
sample(1:10, 10, replace=T) # repeats num, default does not
sd <- sample(1:52, 52)
sd[1:5]
rnorm(100, mean=0, sd=2)
plot(rnorm(50000, mean=0, sd=2))
# dnorm = density function
# pnorm = probability function
# qnorm = quantile function
# rbinom = random binom dist (also pbinom, qbinom, dbinom)
rbinom(100, 1, .5)
mean(rbinom(100, 1, .5))
# debugging
# increment by 1
inc <- function(a){
a <- a +1
}
#decrement by 1
dec <- function(a){
a <- a -1
}
# reciprocate
recip <- function(a) {
if(a==0)
stop("division by zero will occur")
a <- 1 / a
}
myfunc <- function(a){
x <- recip(inc(a))
y <- recip(dec(a))
x * y
}
myfunc(10)
myfunc(1)
debug(myfunc)
undebug(myfunc)
#################
# Data Handling #
#################
# scan function
scan()
1:6
v <- scan()
1:5
scan()
1;2;3;4;5
scan(sep"")
1;2;3;4;5;6
scan(n=3)
1 2 3 4 5 6 7 8 9
# cat function
cat("my daughter is, a, years old")
paste(letters, 1:26)
# importing data
d <- read.csv("example.csv")
d$name
mean(d$height)
# importing from excel
install.packages("gdata")
install.packages("perl") # have to install from website
require("gdata")
# missing values
v <- c(1, 2, 3, NA, 5, NA, 7)
is.na(v)
any(is.na(v))
mean(v, na.rm=T)
summary(v)
sort(v, na.last=T)
w <- c(1, 2, 3, -1, 7, -1, -1)
w
w[w==-1] <- NA
w
#########################
# basic stats #
########################
# build in data sets
data()
library(MASS) # might need to install
data()
data(phones)
phones
example(hist)
# mode function
MyMode <- function(x) {
sort(table(x), decreasing=T)[1]
}
data(presidents)
x <- presidents
m <- MyMode(x)
m
as.numeric(m)
as.numeric(names(m))
sort(table(x), decreasing=T)[1]
# compute median absolute deviation
g <- trees$Girth
median(g)
mad(g)
mean(g)
mad(g, center=median(g))
mad(g, center=mean(g))
sd(trees$Girth)
mad(g, constant=1.49) # scaling
# with missing data
g <- c(g, NA)
mad(g, na.rm=T)
# cov & cor
head(faithful)
plot(faithful$eruptions, faithful$waiting)
cor(faithful$eruptions, faithful$waiting)
cov(faithful$eruptions, faithful$waiting)
cor(faithful$eruptions, faithful$waiting, method="kendall",
use='everything')
# table functions
head(airquality)
table(OzHi = airquality$Ozone > 80, airquality$Month, useNA="ifany")
table(OzHi = airquality$Ozone > 80, airquality$Month, useNA="always")
library(MASS)
attach(survey)
head(survey)
table(Smoke)
table(Smoke, Sex, useNA="always")
#################
# visualizations#
#################
browsers <- c("Chrome", "Internet Explorere", "Firefox",
"Safari", "Opera", "other")
share <- c(38, 19, 16.8, 16, 3.2, 6)
colors <- c("red", "yellow", "blue", "green", "orange", "cyan")
pie(share, browsers, col=colors)
barplot(share, names.arg=browsers, col=colors, ylim=c(0, 40))
barplot(share, names.arg=browsers, col=colors, xlim=c(0, 40),
horiz=T)
attach(morley)
boxplot(Speed ~ Expt, morley, xlab="Exp No.", ylab="Speed")
abline(h=792.458, col="red")
text(3,792.458, "true/speed")
hist(airquality$Temp, freq=F, right=T, col=colors, main="hist",
xlab="temp", breaks=15)
lines(density(airquality$Temp, bw=1.0, col="red", lwd=3))
# line plots
v = sample(1:100, 10)
plot(v, type="o", col="blue", ylim=c(0,100)) # type can use "l", or "b"
x <- sample(1:100, 10)
lines(x, type="o", col="red") # plots new line in previous graph
title(main="main title", col.main="blue")
# scatterplot
x = airquality$Ozone
y = airquality$Wind
plot(x, y, xlab="Ozone", ylab="wind")
abline(lm(y~x))
library(ggplot2)
sp <-ggplot(airquality, aes(x=airquality$Ozone, y=airquality$Wind))
sp + geom_point(shape=1) + geom_smooth(method=lm)
## Error: <text>:218:9: unexpected string constant
## 217:
## 218: scan(sep""
## ^