Robert Batzinger
10 Feb 2017
(or 5 things you should know about R)
# n = 10000
plot(x,y)
# n = 10000
smoothScatter(x, y)
data = rbind(
c("id","math","engl",
"biol","music","R-code"
c(MAX, 20, 20, 20, 20, 20),
c(MIN, 0, 0, 0, 0, 0),
c(mr-a, 3, 9, 5, 20, 15),
c(mr-b, 19, 4, 12, 6, 2),
c(mr-c, 9, 15, 16, 2, 18))
radarchart(data, axistype=1,
caxislabels=
seq(0,20,5),
vlcex=0.8)
Page User Data Matrix
Rg Fn Lb Bu Rs Hm
guest 0 0 1 0 1 5
appl 4 2 0 0 0 2
stud 8 4 2 0 0 2
staff 1 6 2 4 1 3
instr 8 4 4 2 4 2
admin 4 6 1 6 2 3
chordDiagram(pgUsrDatMx)
data 15
statistics 5
probability 3 ...
library(wordcloud2)
wordcloud2(wrddat,
color=ifelse(
wrddat[,2] > 13,
'red','skyblue'))
require(maps)
library(mapdata)
countries =
c('thailand','malaysia',
'indonesia','philippines',
'singapore','myanmar',
'laos','vietnam',
'cambodia')
colors = rainbow(128)
map('world',countries,
fill=TRUE,col=colors)
Provides different political boundary maps
Choose specific columns and checked for correlation
cardat =
cars[,c(1,3:6)]
pairs(cardat)
library(corrgram)
corrgram(cardat, order=TRUE,
lower.panel=panel.shade,
upper.panel=panel.pie,
text.panel=panel.txt,
main="Car Milage Data")
model = lm(mpg ~ wt + hp + drat + disp,data=cardat)
summary(model)
Coefficients:
Estimate Std. Error t value Pr(>|t|)
Intrcpt 29.148738 6.293588 4.631 8.2e-05 ***
wt -3.479668 1.078371 -3.227 0.00327 **
hp -0.034784 0.011597 -2.999 0.00576 **
drat 1.768049 1.319779 1.340 0.19153
disp 0.003815 0.010805 0.353 0.72675
(prob): 0 *** 0.001 ** 0.01 * 0.05 . 0.1 ‘ ’ 1
Residuals:
Min 1Q Median 3Q Max
-3.5077 -1.9052 -0.5057 0.9821 5.6883
Residual standard error: 2.602 on 27 DF
Multiple R-squared: 0.8376
Adjusted R-squared: 0.8136
F-statistic: 34.82 on 4 and 27 DF
(p-value: 2.704e-10)
fit = aov(mpg ~ wt + hp + drat + disp,data=cardat)
summary(fit)
Df Sum Sq Mean Sq F value Pr(>F)
wt 1 847.7 847.7 125.185 1.21e-11 ***
hp 1 83.3 83.3 12.297 0.00161 **
drat 1 11.4 11.4 1.678 0.20610
disp 1 0.8 0.8 0.125 0.72675
Residuals 27 182.8 6.8
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Sample data:
auto mpg cyl disp hp drat wt qsec vs am gear
---------------------------------------------
MazRX4 21.0 6 160 110 3.90 2.6 16.46 0 1 4
Dat710 22.8 4 108 93 3.85 2.3 18.61 1 1 4
R-code:
cmpdata= as.matrix(mtcars)
heatmap(cmpdata, scale="column")
Reorders the dataset to minimize the differences between neighboring cars and attributes
library(rpart)
pfit = rpart(mpg ~ wt + hp + drat + disp,data=cardat)
summary(pfit)
plot(pfit,compress=TRUE)
text(pfit,use.n=TRUE)
model = HoltWinters(tourist)
p = predict(model,12,
prediction.interval=
TRUE,level=0.80)
m = arima(tourist,
order = c(3,1,1),
seasonal = list(
order=c(2,1,1),
period=4))
pp = predict(m,12)
A prize is hidden behind one door
Which door should the contestant choose?
Make a list of 10,000 preferred order for selecting the three doors.
R-code:
dat = rep(sample(3),10000)
dim(dat) = c(10000,3)
colnames(dat) =
c("c1","c2","c3")
First 5 entries:
c1 c2 c3
[1,] 3 1 2
[2,] 1 2 3
[3,] 2 3 1
[4,] 3 1 2
[5,] 1 2 3
tally = rep(0,9)
dim(tally) = c(3,3)
for (i in 1:3) {
for (j in 1:3) {
tally[i,j] =
sum(dat[,i] == j)}}
barplot(tally, col=c(
"red","brown","green"),
names.arg = 1:3,
xlab="Preferences",
main="Tally of Doors")
R-code:
prizeloc =
ceiling(runif(10000,0,3))
First 20 entries:
[1] 3 2 3 1 1 1 1 1 2 2 1 2 3 1 3 2 1 1 2 3
Tally the number of times that the winning door was selected either as first choice or alternative choice.
wins = c(
sum(dat[,1]==prizeloc),
sum(dat[,2]==prizeloc) +
sum(dat[,3]==prizeloc))
... to be continued later.