This is the CODE-ONLY cribsheet for the Novice workshop. Full document to be provided during & after workshop.
– 6.30-6.35pm: A. RStudio - Basics
– 6.35-6.40pm: B. Pre-loaded data - Examine
– 6:40-6:50pm: C. Make your own data - Geo Chart
– 6:50-6:55pm: D. ggplot2 - Set-up
– 6:55-7:15pm: E. ggplot2 - Data Viz + Extensions
– 7:15-7.30pm: BREAK
– 7.30-8.15pm: F. Machine Learning technique - Clustering + Extension
object <- 3 + 5
object
## [1] 8
data()
data("iris")
data("mtcars")
data("longley")
data("USArrests")
data("VADeaths")
class()
dim()
str()
summary()
head()
tail()
View()
?<name of data set>
Country <- c("United Kingdom", "France", "Spain", "Germany", "US", "Australia", "Thailand" )
Popularity <- c()
geodata <- data.frame(Country, Popularity)
View(geodata)
class(geodata)
install.packages("googleVis")
library(googleVis)
?gvisGeoChart
args(gvisGeoChart)
geochart <- gvisGeoChart(geodata,
locationvar = "Country",
colorvar = "Popularity")
plot(geochart)
install.packages("ggplot2")
library(ggplot2)
?ggplot
data("diamonds", "economics")
line.graph <- ggplot(data = economics, aes(x = date, y = pop)) + geom_line()
plot(line.graph)
bar.chart <- ggplot(data = diamonds, aes(x = cut)) + geom_bar()
plot(bar.chart)
box.plot <- ggplot(data = diamonds, aes(x = cut, y = price)) + geom_boxplot()
plot(box.plot)
scatterplot <- ggplot(data = diamonds, aes(x = price, y = carat)) + geom_point()
plot(scatterplot)
–
+ ggtitle("insert title here")
+ xlab("insert x-axis label here")
+ ylab("insert y-axis label here")
–
+ geom_line(colour = "tomato")
+ geom_bar(colour = "slategrey", fill = "peachpuff")
+ geom_boxplot(colour = "navy", fill = "oldlace")
–
bar.chart.subset <- ggplot(data = diamonds, aes(x = cut)) + geom_bar(aes(fill = clarity))
plot(bar.chart.subset)
scatterplot.subset <- ggplot(data = diamonds, aes(x = price, y = carat)) + geom_point(aes(colour = cut))
plot(scatterplot.subset)
–
bar.chart.facet <- ggplot(data = diamonds, aes(x = cut)) + geom_bar() + facet_grid(color~.)
bar.chart.facet <- ggplot(data = diamonds, aes(x = cut)) + geom_bar(aes(fill = color)) + facet_grid(color~.)
plot(bar.chart.facet)
Steps:
(i) Load & Explore data
(ii) Clean data
(iii) Standardise data
(iv) Calculate Euclidean Distance Matrix
(v) Calculate Clusters
(vi) Make Dendrogram data viz
(vii) Define appropriate no. of Clusters
(viii) Variable combinations determining Cluster Membership
(ix) Create final Cluster Membership data
data(mtcars)
View(mtcars)
?mtcars
mtcars1 <- mtcars[, -c(8, 9)]
View(mtcars1)
medians <- apply(mtcars1, 2, median)
mads <- apply(mtcars1, 2, mad)
mtcars2 <- scale(mtcars1, center = medians, scale = mads)
print(mtcars2, digits=2)
## mpg cyl disp hp drat wt qsec gear carb
## Mazda RX4 0.333 0.00 -0.26 -0.17 0.29 -0.92 -0.88 0.00 1.35
## Mazda RX4 Wag 0.333 0.00 -0.26 -0.17 0.29 -0.59 -0.49 0.00 1.35
## Datsun 710 0.665 -0.67 -0.63 -0.39 0.22 -1.31 0.64 0.00 -0.67
## Hornet 4 Drive 0.407 0.00 0.44 -0.17 -0.87 -0.14 1.22 -0.67 -0.67
## Hornet Sportabout -0.092 0.67 1.17 0.67 -0.77 0.15 -0.49 -0.67 0.00
mtcars3 <- dist(mtcars2, method = "euclidean")
print(mtcars3, digits=2)
clusters <- hclust(mtcars3, method = "ward.D2")
plot(clusters)
plot(clusters, hang = -1)
rect.hclust(clusters, 6)
clusters.6 <- cutree(clusters, 6)
means <- aggregate(mtcars2, list(clusters.6), mean)
options(digits = 2)
means
## Group.1 mpg cyl disp hp drat wt qsec gear carb
## 1 1 0.10 0.00 -0.26 0.067 0.223 -0.39 -0.335 0.13 1.62
## 2 2 0.48 -0.48 -0.26 -0.352 -0.153 -0.50 1.595 -0.29 -0.39
## 3 3 -0.45 0.67 0.85 0.610 -0.916 0.47 -0.275 -0.67 0.29
## 4 4 -0.85 0.67 1.03 1.936 -0.028 0.28 -1.852 0.00 2.02
## 5 5 -1.36 0.67 1.86 1.215 -0.911 2.63 0.021 -0.67 1.35
## 6 6 2.01 -0.67 -0.78 -0.616 0.790 -1.89 0.486 0.22 -0.34
means2 <- aggregate(mtcars[,-c(8,9)], list(clusters.6), mean)
means2
## Group.1 mpg cyl disp hp drat wt qsec gear carb
## 1 1 20 6.0 160 128 3.9 3.0 17 4.2 4.4
## 2 2 22 4.6 160 96 3.6 2.9 20 3.6 1.4
## 3 3 17 8.0 316 170 3.0 3.7 17 3.0 2.4
## 4 4 15 8.0 340 272 3.7 3.5 15 4.0 5.0
## 5 5 12 8.0 457 217 3.1 5.3 18 3.0 4.0
## 6 6 30 4.0 87 76 4.3 1.9 18 4.3 1.5
mtcars.membership <- cbind(clusters.6, mtcars)
mtcars.membership
## clusters.6 mpg cyl disp hp drat wt qsec vs am gear
## Mazda RX4 1 21 6 160 110 3.9 2.6 16 0 1 4
## Mazda RX4 Wag 1 21 6 160 110 3.9 2.9 17 0 1 4
## Datsun 710 2 23 4 108 93 3.9 2.3 19 1 1 4
## Hornet 4 Drive 2 21 6 258 110 3.1 3.2 19 1 0 3
## Hornet Sportabout 3 19 8 360 175 3.1 3.4 17 0 0 3
## Valiant 2 18 6 225 105 2.8 3.5 20 1 0 3
install.packages("sparcl")
library(sparcl)
ColorDendrogram(clusters, y = clusters.6, labels = names(clusters.6), branchlength = 5)