Install any needed packages.
#install.packages("lsa")
#install.packages("mdendro")
#install.packages("dendextend")
Load all packages
# packages we've used a lot
library(ggplot2)
library(ggpubr)
# packages we've only recently started using
library(scatterplot3d)
library(pander)
library(vegan)
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.5-7
# NEW packages- make sure you installed them!
library(dendextend)
## Registered S3 method overwritten by 'dendextend':
## method from
## rev.hclust vegan
##
## ---------------------
## Welcome to dendextend version 1.15.2
## Type citation('dendextend') for how to cite the package.
##
## Type browseVignettes(package = 'dendextend') for the package vignette.
## The github page is: https://github.com/talgalili/dendextend/
##
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
## You may ask questions at stackoverflow, use the r and dendextend tags:
## https://stackoverflow.com/questions/tagged/dendextend
##
## To suppress this message use: suppressPackageStartupMessages(library(dendextend))
## ---------------------
##
## Attaching package: 'dendextend'
## The following object is masked from 'package:permute':
##
## shuffle
## The following object is masked from 'package:ggpubr':
##
## rotate
## The following object is masked from 'package:stats':
##
## cutree
library(lsa)
## Loading required package: SnowballC
NOTE: This vectors may be useful for exploring the functionality during lecture of R functions but are not used in the analysis itself or added to the dataframe.
# data exploration vectors
MW.da <- c(89, 121, 133, 146, 165, 75, 155, 131, 146, 131,
149, 132, 115, 147, 174, 105, 119, 117, 204, 181)
polar.req <-c(7, 4.8, 13, 12.5, 5, 7.9, 8.4, 4.9, 10.1, 4.9,
5.3, 10, 6.6, 8.6,9.1,7.5,6.6,5.6,5.2,5.4)
freq <-c(7.8, 1.1, 5.19 ,6.72, 4.39, 6.77, 2.03, 6.95,
6.32, 10.15, 2.28, 4.37, 4.26, 3.45, 5.23,
6.46, 5.12, 7.01, 1.09, 3.3)
The following vectors are the columns that make up Table 2.2 of Higgs and Attwood 2005, Chapter 2.
# main data vectors
aa.1 <-c('A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L','M','N',
'P','Q','R','S','T','V','W','Y')
vol <- c(67, 86, 91, 109, 135, 48, 118, 124, 135, 124, 124, 96, 90, 114,
148, 73, 93, 105, 163, 141)
bulk <-c(11.5, 13.46, 11.68, 13.57, 19.8, 3.4, 13.69, 21.4, 15.71, 21.4, 16.25,
12.28,17.43, 14.45, 14.28, 9.47, 15.77, 21.57, 21.67, 18.03)
pol <-c(0,1.48,49.7,49.9,0.35,0,51.6,0.13,49.5,0.13,
1.43,3.38,1.58,3.53,52,1.67,1.66,0.13,2.1,1.61)
isoelec <-c(6,5.07,2.77,3.22,5.48,5.97,7.59,6.02,9.74,5.98, 5.74, 5.41, 6.3,
5.65, 10.76, 5.68, 6.16, 5.96, 5.89, 5.66)
H2Opho.34 <-c(1.8,2.5,-3.5,-3.5,2.8,-0.4,-3.2,4.5, -3.9, 3.8, 1.9, -3.5, -1.6,
-3.5, -4.5, -0.8, -0.7, 4.2, -0.9, -1.3)
H2Opho.35 <-c(1.6,2,-9.2,-8.2,3.7,1,-3,3.1,-8.8,2.8,3.4,-4.8, -0.2,-4.1,-12.3,
0.6,1.2,2.6,1.9,-0.7)
saaH2O <-c(113, 140,151,183,218,85,194,182,211,180, 204, 158, 143, 189,
241, 122, 146,160,259,229)
faal.fold <-c(0.74,0.91,0.62,0.62,0.88,0.72,0.78,0.88,0.52, 0.85, 0.85, 0.63,
0.64,0.62,0.64,0.66,0.7, 0.86, 0.85, 0.76)
TODO: Create a vector of the full names and the 3-letter codes of the amino acids, then add it to the data.frame.
# build dataframe
## TODO:
## create a vector with the FULL names of all the amino acids
aa.full <- NA
## TODO:
## create a vector with the 3-letter codes for amino acids
aa.3 <- NA
## create dataframe
habk.df <- data.frame(aa.full = aa.full,
aa.3 = aa.3,
aa.1 = aa.1,
Vol = vol, Bulk = bulk, Polarity = pol,
pI = isoelec, Hyd.1 = H2Opho.34,
Hyd.2 = H2Opho.35, Surface_area = saaH2O,
Fract_area = faal.fold)
# name rows - this will help later
# when we do PCA
row.names(habk.df) <- aa.1
# display with pander
pander(habk.df)
| aa.full | aa.3 | aa.1 | Vol | Bulk | Polarity | pI | Hyd.1 | |
|---|---|---|---|---|---|---|---|---|
| A | NA | NA | A | 67 | 11.5 | 0 | 6 | 1.8 |
| C | NA | NA | C | 86 | 13.46 | 1.48 | 5.07 | 2.5 |
| D | NA | NA | D | 91 | 11.68 | 49.7 | 2.77 | -3.5 |
| E | NA | NA | E | 109 | 13.57 | 49.9 | 3.22 | -3.5 |
| F | NA | NA | F | 135 | 19.8 | 0.35 | 5.48 | 2.8 |
| G | NA | NA | G | 48 | 3.4 | 0 | 5.97 | -0.4 |
| H | NA | NA | H | 118 | 13.69 | 51.6 | 7.59 | -3.2 |
| I | NA | NA | I | 124 | 21.4 | 0.13 | 6.02 | 4.5 |
| K | NA | NA | K | 135 | 15.71 | 49.5 | 9.74 | -3.9 |
| L | NA | NA | L | 124 | 21.4 | 0.13 | 5.98 | 3.8 |
| M | NA | NA | M | 124 | 16.25 | 1.43 | 5.74 | 1.9 |
| N | NA | NA | N | 96 | 12.28 | 3.38 | 5.41 | -3.5 |
| P | NA | NA | P | 90 | 17.43 | 1.58 | 6.3 | -1.6 |
| Q | NA | NA | Q | 114 | 14.45 | 3.53 | 5.65 | -3.5 |
| R | NA | NA | R | 148 | 14.28 | 52 | 10.76 | -4.5 |
| S | NA | NA | S | 73 | 9.47 | 1.67 | 5.68 | -0.8 |
| T | NA | NA | T | 93 | 15.77 | 1.66 | 6.16 | -0.7 |
| V | NA | NA | V | 105 | 21.57 | 0.13 | 5.96 | 4.2 |
| W | NA | NA | W | 163 | 21.67 | 2.1 | 5.89 | -0.9 |
| Y | NA | NA | Y | 141 | 18.03 | 1.61 | 5.66 | -1.3 |
| Hyd.2 | Surface_area | Fract_area | |
|---|---|---|---|
| A | 1.6 | 113 | 0.74 |
| C | 2 | 140 | 0.91 |
| D | -9.2 | 151 | 0.62 |
| E | -8.2 | 183 | 0.62 |
| F | 3.7 | 218 | 0.88 |
| G | 1 | 85 | 0.72 |
| H | -3 | 194 | 0.78 |
| I | 3.1 | 182 | 0.88 |
| K | -8.8 | 211 | 0.52 |
| L | 2.8 | 180 | 0.85 |
| M | 3.4 | 204 | 0.85 |
| N | -4.8 | 158 | 0.63 |
| P | -0.2 | 143 | 0.64 |
| Q | -4.1 | 189 | 0.62 |
| R | -12.3 | 241 | 0.64 |
| S | 0.6 | 122 | 0.66 |
| T | 1.2 | 146 | 0.7 |
| V | 2.6 | 160 | 0.86 |
| W | 1.9 | 259 | 0.85 |
| Y | -0.7 | 229 | 0.76 |
A data dictionary is… (For exam, you need to be able to complete this sentence, describe why a data dictionary is needed, what makes good column names, what makes bad column names, etc.)
# (For exam you need to be able to...
# describe why a data dictionary is needed,
# what makes good column names,
# what makes bad column names, etc.)
# data dictionary columns
column.names <- c("Vol", "Bulk", "Polarity","pI", "Hyd.1", "Hyd.2",
"Surface_area","Fract_area")
name.full <- c("Volume", "Bulkiness", "Polarity",
"pH at Isoelectric point", "Hydrophobicity scale-1",
"Hydrophobicity scale-2","SA accessible to H2O","Fractional area")
vector.names <- c("vol", "bulk", "pol","isoelec", "H2Opho.34",
"H2Opho.35", "saaH2O","faal.fold")
reference <- c("Creighton 1993", "Zimmerman et al. 1968", "Zimmerman et al. 1968",
"Zimmerman et al. 1968", "Kyte & Doolittle 1982", "Engelman et al. 1986",
"Miller et al. 1987"," Rose et al. 1985")
TODO: Make a dataframe to hold the DATA DICTIONARY information
# TODO: Make a dataframe to hold DATA DICTIONARY information
# make dataframe
data.dictionary <- NA
# Display data dictionary
pander(data.dictionary)
NA
TODO: Using ggpubr, remake AND update/augment Figure 2.8 of Higgs and Attwood 2005, Chapter 2, page 26.
# Make scatterplot with appropriate ggpubr function
ggscatter(x = "pI", #TODO
y = "Vol", #TODO
data = habk.df, #TODO
color = "Polarity", #TODO
size = "Hyd.1", #TODO
label = "aa.1", #TODO
ylab = "Volume (Angstroms)", #TODO
xlab = "pI (pH at Isoelectric point)", #TODO
title = "Figure 2.8: Plot of amino acid volumue again pI, two #TODO properties\nthought to be important to protein folding",
ylim = c(0, 180), #TODO
xlim = c(1,12)) #TODO
For the exam you need to be able to understand what data centering and scaling is, how do do them in R with and without specialized functions, how to do them to an individual vector, how to do them to a column in a dataframe, how to do them using a for loop. Data centering is… Data scaling is…
# For the exam you need to be able to understand what data centering and scaling is,
# how do do them in R with and without specialized functions, how to do them to an
#individual vector, how to do them to a column in a dataframe,
# how to do them using a for loop.
# Subset data
## get info
dim(habk.df)
## [1] 20 11
ncol(habk.df)
## [1] 11
# easiest
habk.df.numeric <- habk.df[ , c(-1,-2,-3)]
# equivalent
habk.df.numeric <- habk.df[ , c(4,5,6,7,8,9,10,11)]
habk.df.numeric <- habk.df[ , c(4:11)]
Set up data:
# Select a single column to explore how to do centering / scaling
X <- habk.df.numeric$Vol
What is this calculating?
sum(X)/length(X)
## [1] 109.2
Challenge: what is this calculating?
X <- habk.df.numeric$Vol
sqrt(sum((X-mean(X))^2)/length(X))
## [1] 28.31007
Demonstrate centering:
# What is this?
X <- habk.df.numeric$Vol
u <- mean(X)
u-X
## [1] 42.2 23.2 18.2 0.2 -25.8 61.2 -8.8 -14.8 -25.8 -14.8 -14.8 13.2
## [13] 19.2 -4.8 -38.8 36.2 16.2 4.2 -53.8 -31.8
Demonstrate scaling (without centering):
# What is this?
X <- habk.df.numeric$Vol
s <- sd(X)
X/s
## [1] 2.306724 2.960870 3.133014 3.752730 4.647877 1.652579 4.062589 4.269161
## [9] 4.647877 4.269161 4.269161 3.305157 3.098585 3.924874 5.095451 2.513297
## [17] 3.201871 3.615016 5.611881 4.854450
Demonstrate scaling (with centering):
# What is this?
X <- habk.df.numeric$Vol
u <- mean(X)
s <- sd(X)
z <- (u-X)/s
Compare the distributions of these varibles
# what is this chunk doing?
par(mfrow = c(2,2), mar = c(3,1,3,1))
hist(X)
abline(v = mean(X), col = 2)
hist(X/s)
abline(v = mean(X/s), col = 3)
hist(u-X)
abline(v = 0, col = 4)
hist(z)
abline(v = 0, col = 5)
par(mfrow = c(1,1), mar = c(4,4,4,4))
Use the scale() function
# What is this?
# Why are each of these lines different?
scale(X, center = T, scale = F)
## [,1]
## [1,] -42.2
## [2,] -23.2
## [3,] -18.2
## [4,] -0.2
## [5,] 25.8
## [6,] -61.2
## [7,] 8.8
## [8,] 14.8
## [9,] 25.8
## [10,] 14.8
## [11,] 14.8
## [12,] -13.2
## [13,] -19.2
## [14,] 4.8
## [15,] 38.8
## [16,] -36.2
## [17,] -16.2
## [18,] -4.2
## [19,] 53.8
## [20,] 31.8
## attr(,"scaled:center")
## [1] 109.2
scale(X, center = F, scale = T)
## [,1]
## [1,] 0.5788805
## [2,] 0.7430407
## [3,] 0.7862407
## [4,] 0.9417609
## [5,] 1.1664011
## [6,] 0.4147204
## [7,] 1.0195209
## [8,] 1.0713610
## [9,] 1.1664011
## [10,] 1.0713610
## [11,] 1.0713610
## [12,] 0.8294408
## [13,] 0.7776007
## [14,] 0.9849609
## [15,] 1.2787212
## [16,] 0.6307206
## [17,] 0.8035207
## [18,] 0.9072008
## [19,] 1.4083213
## [20,] 1.2182411
## attr(,"scaled:scale")
## [1] 115.7406
scale(X, center = T, scale = T)
## [,1]
## [1,] -1.452891984
## [2,] -0.798746304
## [3,] -0.626602704
## [4,] -0.006885744
## [5,] 0.888260976
## [6,] -2.107037664
## [7,] 0.302972736
## [8,] 0.509545056
## [9,] 0.888260976
## [10,] 0.509545056
## [11,] 0.509545056
## [12,] -0.454459104
## [13,] -0.661031424
## [14,] 0.165257856
## [15,] 1.335834336
## [16,] -1.246319664
## [17,] -0.557745264
## [18,] -0.144600624
## [19,] 1.852265136
## [20,] 1.094833296
## attr(,"scaled:center")
## [1] 109.2
## attr(,"scaled:scale")
## [1] 29.04552
CHALLENGE: What is this calculating?
sum(habk.df.numeric$Vol)/length(habk.df.numeric$Vol)
## [1] 109.2
# make a copy of the data
habk.df.centered <- habk.df.numeric
# check original mean
mean(habk.df.numeric$Vol)
## [1] 109.2
habk.df.centered$Vol <- habk.df.centered$Vol - mean(habk.df.centered$Vol)
mean(habk.df.centered$Vol)
## [1] -2.842518e-15
# Bulk
habk.df.centered$Bulk <- habk.df.centered$Bulk - mean(habk.df.centered$Bulk)
mean(habk.df.numeric$Bulk)
## [1] 15.3405
TODO: Center another column using t
# center another column as in the previous chunk
Center ALL variable with for loop
# What does this for() loop do?
for(i in 1:ncol(habk.df.centered)){
mean.i <- mean(habk.df.centered[,i])
habk.df.centered[,i] <- mean.i-habk.df.centered[,i]
}
summary(habk.df.centered)
## Vol Bulk Polarity pI
## Min. :-53.80 Min. :-6.3295 Min. :-38.406 Min. :-4.7075
## 1st Qu.:-17.55 1st Qu.:-3.1320 1st Qu.: -1.429 1st Qu.:-0.0025
## Median : -2.30 Median : 0.2605 Median : 11.959 Median : 0.1275
## Mean : 0.00 Mean : 0.0000 Mean : 0.000 Mean : 0.0000
## 3rd Qu.: 18.45 3rd Qu.: 2.1755 3rd Qu.: 13.299 3rd Qu.: 0.4450
## Max. : 61.20 Max. :11.9405 Max. : 13.594 Max. : 3.2825
## Hyd.1 Hyd.2 Surface_area Fract_area
## Min. :-4.99 Min. :-5.070 Min. :-83.60 Min. :-0.1735
## 1st Qu.:-2.54 1st Qu.:-3.520 1st Qu.:-30.35 1st Qu.:-0.1135
## Median : 0.36 Median :-2.170 Median : -5.60 Median : 0.0065
## Mean : 0.00 Mean : 0.000 Mean : 0.00 Mean : 0.0000
## 3rd Qu.: 3.01 3rd Qu.: 2.905 3rd Qu.: 30.15 3rd Qu.: 0.0990
## Max. : 4.01 Max. :10.930 Max. : 90.40 Max. : 0.2165
TODO: rewrite the for() loop above so that the variable mean.i doesn’t have to be created and only a single line of code is executed within the for() loop.
#
Check …
# What is happening in this chunk?
sd(habk.df.centered$Vol)
## [1] 29.04552
sd(habk.df.numeric$Vol)
## [1] 29.04552
sd(habk.df.centered$Vol) == sd(habk.df.numeric$Vol)
## [1] TRUE
TODO: Write logical checks for the standard deviation (sd) of another variable
# logical check
Standardize a single column
# What is happening in this chunk?
# create new dataframe
habk.df.standard <- habk.df.centered
habk.df.standard$Vol <- habk.df.standard$Vol/sd(habk.df.standard$Vol)
sd(habk.df.standard$Vol)
## [1] 1
TODO: Standardize another column a done in the previous column.
# Standardize another column
Standardized an entire matrix with a for() loop
# What does this chunk do?
for(i in 1:ncol(habk.df.standard)){
sd.i <- sd(habk.df.standard[,i])
habk.df.standard[,i] <- habk.df.standard[,i]/sd.i
}
summary(habk.df.standard)
## Vol Bulk Polarity pI
## Min. :-1.85227 Min. :-1.36122 Min. :-1.75257 Min. :-2.664603
## 1st Qu.:-0.60422 1st Qu.:-0.67357 1st Qu.:-0.06519 1st Qu.:-0.001415
## Median :-0.07919 Median : 0.05602 Median : 0.54572 Median : 0.072169
## Mean : 0.00000 Mean : 0.00000 Mean : 0.00000 Mean : 0.000000
## 3rd Qu.: 0.63521 3rd Qu.: 0.46786 3rd Qu.: 0.60687 3rd Qu.: 0.251885
## Max. : 2.10704 Max. : 2.56792 Max. : 0.62033 Max. : 1.858005
## Hyd.1 Hyd.2 Surface_area Fract_area
## Min. :-1.6706 Min. :-1.0362 Min. :-1.8709 Min. :-1.50457
## 1st Qu.:-0.8504 1st Qu.:-0.7194 1st Qu.:-0.6792 1st Qu.:-0.98426
## Median : 0.1205 Median :-0.4435 Median :-0.1253 Median : 0.05637
## Mean : 0.0000 Mean : 0.0000 Mean : 0.0000 Mean : 0.00000
## 3rd Qu.: 1.0077 3rd Qu.: 0.5937 3rd Qu.: 0.6747 3rd Qu.: 0.85852
## Max. : 1.3425 Max. : 2.2338 Max. : 2.0230 Max. : 1.87746
TODO: Shorten code in the for() loop so that the object sd.i doesn’t need to be created
# for() loop without sd.i line
TODO: Write a for() loop that does BOTH tasks in a single loop.
# center
# scale by sd
Write a for() loop that….
# What does this chunk do?
habk.df.standard <- habk.df.numeric
for(i in 1:ncol(habk.df.standard)){
habk.df.standard[,i] <- scale(habk.df.standard[,i])
}
summary(habk.df.standard)
## Vol.V1 Bulk.V1 Polarity.V1
## Min. :-2.1070377 Min. :-2.5679182 Min. :-0.6203318
## 1st Qu.:-0.6352099 1st Qu.:-0.4678620 1st Qu.:-0.6068701
## Median : 0.0791861 Median :-0.0560230 Median :-0.5457222
## Mean : 0.0000000 Mean : 0.0000000 Mean : 0.0000000
## 3rd Qu.: 0.6042240 3rd Qu.: 0.6735664 3rd Qu.: 0.0651864
## Max. : 1.8522651 Max. : 1.3612193 Max. : 1.7525720
## pI.V1 Hyd.1.V1 Hyd.2.V1
## Min. :-1.8580053 Min. :-1.3424968 Min. :-2.2338170
## 1st Qu.:-0.2518850 1st Qu.:-1.0077096 1st Qu.:-0.5937089
## Median :-0.0721693 Median :-0.1205234 Median : 0.4434934
## Mean : 0.0000000 Mean : 0.0000000 Mean : 0.0000000
## 3rd Qu.: 0.0014151 3rd Qu.: 0.8503596 3rd Qu.: 0.7193994
## Max. : 2.6646032 Max. : 1.6705883 Max. : 1.0361804
## Surface_area.V1 Fract_area.V1
## Min. :-2.0230352 Min. :-1.8774603
## 1st Qu.:-0.6747180 1st Qu.:-0.8585153
## Median : 0.1253208 Median :-0.0563672
## Mean : 0.0000000 Mean : 0.0000000
## 3rd Qu.: 0.6791938 3rd Qu.: 0.9842575
## Max. : 1.8708600 Max. : 1.5045698
CHALLENGE (will NOT be on exam). How do these lines of code work:
# CHALLENGE: What does this do?
X.bar <- apply(habk.df.numeric, 2, mean)
summary(habk.df.numeric)
## Vol Bulk Polarity pI
## Min. : 48.00 Min. : 3.40 Min. : 0.000 Min. : 2.770
## 1st Qu.: 90.75 1st Qu.:13.16 1st Qu.: 0.295 1st Qu.: 5.607
## Median :111.50 Median :15.08 Median : 1.635 Median : 5.925
## Mean :109.20 Mean :15.34 Mean :13.594 Mean : 6.053
## 3rd Qu.:126.75 3rd Qu.:18.47 3rd Qu.:15.023 3rd Qu.: 6.055
## Max. :163.00 Max. :21.67 Max. :52.000 Max. :10.760
## Hyd.1 Hyd.2 Surface_area Fract_area
## Min. :-4.50 Min. :-12.300 Min. : 85.0 Min. :0.5200
## 1st Qu.:-3.50 1st Qu.: -4.275 1st Qu.:145.2 1st Qu.:0.6375
## Median :-0.85 Median : 0.800 Median :181.0 Median :0.7300
## Mean :-0.49 Mean : -1.370 Mean :175.4 Mean :0.7365
## 3rd Qu.: 2.05 3rd Qu.: 2.150 3rd Qu.:205.8 3rd Qu.:0.8500
## Max. : 4.50 Max. : 3.700 Max. :259.0 Max. :0.9100
habk.df.standard <- apply(habk.df.numeric, 2, scale)
habk.df.standard <- data.frame(habk.df.standard)
summary(habk.df.standard)
## Vol Bulk Polarity pI
## Min. :-2.10704 Min. :-2.56792 Min. :-0.62033 Min. :-1.858005
## 1st Qu.:-0.63521 1st Qu.:-0.46786 1st Qu.:-0.60687 1st Qu.:-0.251885
## Median : 0.07919 Median :-0.05602 Median :-0.54572 Median :-0.072169
## Mean : 0.00000 Mean : 0.00000 Mean : 0.00000 Mean : 0.000000
## 3rd Qu.: 0.60422 3rd Qu.: 0.67357 3rd Qu.: 0.06519 3rd Qu.: 0.001415
## Max. : 1.85227 Max. : 1.36122 Max. : 1.75257 Max. : 2.664603
## Hyd.1 Hyd.2 Surface_area Fract_area
## Min. :-1.3425 Min. :-2.2338 Min. :-2.0230 Min. :-1.87746
## 1st Qu.:-1.0077 1st Qu.:-0.5937 1st Qu.:-0.6747 1st Qu.:-0.85852
## Median :-0.1205 Median : 0.4435 Median : 0.1253 Median :-0.05637
## Mean : 0.0000 Mean : 0.0000 Mean : 0.0000 Mean : 0.00000
## 3rd Qu.: 0.8504 3rd Qu.: 0.7194 3rd Qu.: 0.6792 3rd Qu.: 0.98426
## Max. : 1.6706 Max. : 1.0362 Max. : 1.8709 Max. : 1.50457
Prepare data:
scatterplot3d does NOT have a data = argument. Make seperate vectors to work with to make code cleanr. ….
# raw vectors
Xraw <- habk.df.numeric$Bulk
Yraw <- habk.df.numeric$Polarity
Zraw <- habk.df.numeric$Surface_area
# center
Xcent <- Xraw - mean(Xraw)
Ycent <- Yraw - mean(Yraw)
Zcent <- Zraw - mean(Zraw)
# scale, without centering
Xscale0 <- Xraw/sd(Xraw)
Yscale0 <- Yraw/sd(Yraw)
Zscale0 <- Zraw/sd(Zraw)
# scale AND center
Xscale <- (Xraw - mean(Xraw))/sd(Xraw)
Yscale <- (Yraw - mean(Yraw))/sd(Yraw)
Zscale <- (Zraw - mean(Zraw))/sd(Zraw)
TODO: create ALL of the scaled vectors created above using the scale() function.
# vectors using R function()
3-D scatter plot of….
# what does this chunk do?
# main plot with scatterplot3d() - know how this works
plot1 <- scatterplot3d(x = Xraw,
y = Yraw,
z = Zraw,
color = 2,
pch = 16, type = "h",
xlim = c(-15,25),
ylim = c(-30,50),
zlim = c(-100,275),
main = "Raw data")
# add-ons don't need to know how these work
# These are lines at x = 0, y = 0, z = 0
p2 <- plot1$xyz.convert(0,-40,0)
p3 <- plot1$xyz.convert(0,60,0)
segments(p2$x,p2$y,p3$x,p3$y,lwd=2,col=2)
p2 <- plot1$xyz.convert(-20,0,0)
p3 <- plot1$xyz.convert(30,0,0)
segments(p2$x,p2$y,p3$x,p3$y,lwd=2,col=2)
3-D scatter plot of….
# what does this chunk do?
# main plot - know how this works
plot2 <- scatterplot3d(x = Xcent, y = Ycent, z = Zcent,
color = 2,
pch = 16, type = "h",
xlim = c(-15,25),
ylim = c(-30,50),
zlim = c(-100,275), main = "Centered")
# add-ons don't need to know how these work
# These are lines at x = 0, y = 0, z = 0
p2 <- plot2$xyz.convert(0,-40,0)
p3 <- plot2$xyz.convert(0,60,0)
segments(p2$x,p2$y,p3$x,p3$y,lwd=2,col=2)
p2 <- plot2$xyz.convert(-20,0,0)
p3 <- plot2$xyz.convert(30,0,0)
segments(p2$x,p2$y,p3$x,p3$y,lwd=2,col=2)
3-D scatter plot of….
# what does this chunk do?
# main plot - know how this works
plot3 <- scatterplot3d(x = Xscale0, y = Yscale0, z = Zscale0,
color = 3,
pch = 12, type = "h",
xlim = c(-15,25),
ylim = c(-30,50),
zlim = c(-100,275),
main = "Scaled (NOT centered)")
p2 <- plot3$xyz.convert(0,-40,0)
p3 <- plot3$xyz.convert(0,60,0)
segments(p2$x,p2$y,p3$x,p3$y,lwd=2,col=2)
p2 <- plot3$xyz.convert(-20,0,0)
p3 <- plot3$xyz.convert(30,0,0)
segments(p2$x,p2$y,p3$x,p3$y,lwd=2,col=2)
3-D scatter plot of….
# main plot - know how this works
plot4 <- scatterplot3d(x = Xscale, y = Yscale, z = Zscale,
color = 3,
pch = 16, type = "h",
xlim = c(-25,25),
ylim = c(-50,50),
zlim = c(-100,275),
main = "Centered & Scaled")
p2 <- plot4$xyz.convert(0,-40,0)
p3 <- plot4$xyz.convert(0,60,0)
segments(p2$x,p2$y,p3$x,p3$y,lwd=2,col=2)
p2 <- plot4$xyz.convert(-20,0,0)
p3 <- plot4$xyz.convert(30,0,0)
segments(p2$x,p2$y,p3$x,p3$y,lwd=2,col=2)
3-D scatter plot of….
# main plot - know how this works
plot5 <- scatterplot3d(x = Xscale, y = Yscale, z = Zscale,
color = 3,
pch = 16, type = "h",
#xlim = c(-25,25),
#ylim = c(-50,50),
#zlim = c(-100,275),
main = "Centered & Scaled")
p2 <- plot5$xyz.convert(0,-40,0)
p3 <- plot5$xyz.convert(0,60,0)
segments(p2$x,p2$y,p3$x,p3$y,lwd=2,col=2)
p2 <- plot5$xyz.convert(-20,0,0)
p3 <- plot5$xyz.convert(30,0,0)
segments(p2$x,p2$y,p3$x,p3$y,lwd=2,col=2)
# what does this chunk do?
# which of the transformations is NOT being shown
par(mfrow = c(2,2), mar = c(1,1,1,1))
plot1 <- scatterplot3d(x = Xraw,
y = Yraw,
z = Zraw,
color = 2,
pch = 16, type = "h",
xlim = c(-15,25),
ylim = c(-30,50),
zlim = c(-100,275),
main = "")
# add-ons don't need to know how these work
# These are lines at x = 0, y = 0, z = 0
p2 <- plot1$xyz.convert(0,-40,0)
p3 <- plot1$xyz.convert(0,60,0)
segments(p2$x,p2$y,p3$x,p3$y,lwd=2,col=2)
p2 <- plot1$xyz.convert(-20,0,0)
p3 <- plot1$xyz.convert(30,0,0)
segments(p2$x,p2$y,p3$x,p3$y,lwd=2,col=2)
# main plot - know how this works
plot2 <- scatterplot3d(x = Xcent, y = Ycent, z = Zcent,
color = 2,
pch = 16, type = "h",
xlim = c(-15,25),
ylim = c(-30,50),
zlim = c(-100,275), main = "")
# add-ons don't need to know how these work
# These are lines at x = 0, y = 0, z = 0
p2 <- plot2$xyz.convert(0,-40,0)
p3 <- plot2$xyz.convert(0,60,0)
segments(p2$x,p2$y,p3$x,p3$y,lwd=2,col=2)
p2 <- plot2$xyz.convert(-20,0,0)
p3 <- plot2$xyz.convert(30,0,0)
segments(p2$x,p2$y,p3$x,p3$y,lwd=2,col=2)
plot4 <- scatterplot3d(x = Xscale, y = Yscale, z = Zscale,
color = 3,
pch = 16, type = "h",
xlim = c(-25,25),
ylim = c(-50,50),
zlim = c(-100,275),
main = "")
p2 <- plot4$xyz.convert(0,-40,0)
p3 <- plot4$xyz.convert(0,60,0)
segments(p2$x,p2$y,p3$x,p3$y,lwd=2,col=2)
p2 <- plot4$xyz.convert(-20,0,0)
p3 <- plot4$xyz.convert(30,0,0)
segments(p2$x,p2$y,p3$x,p3$y,lwd=2,col=2)
# main plot - know how this works
plot5 <- scatterplot3d(x = Xscale, y = Yscale, z = Zscale,
color = 3,
pch = 16, type = "h",
#xlim = c(-25,25),
#ylim = c(-50,50),
#zlim = c(-100,275),
main = "")
p2 <- plot5$xyz.convert(0,-40,0)
p3 <- plot5$xyz.convert(0,60,0)
segments(p2$x,p2$y,p3$x,p3$y,lwd=2,col=2)
p2 <- plot5$xyz.convert(-20,0,0)
p3 <- plot5$xyz.convert(30,0,0)
segments(p2$x,p2$y,p3$x,p3$y,lwd=2,col=2)