email: bryan256.@msn.com
blog: sql-fy.com
Twitter: @BryanCafferky
LinkedIn: linkedin.com/in/bryancafferky
Experience
str(iris, vec.len=1)
'data.frame': 150 obs. of 5 variables:
$ Sepal.Length: num 5.1 4.9 ...
$ Sepal.Width : num 3.5 3 ...
$ Petal.Length: num 1.4 1.4 ...
$ Petal.Width : num 0.2 0.2 ...
$ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 ...
summary(iris)
| Sepal.Length | Sepal.Width | Petal.Length | Petal.Width | Species | |
|---|---|---|---|---|---|
| Min. :4.300 | Min. :2.000 | Min. :1.000 | Min. :0.100 | setosa :50 | |
| 1st Qu.:5.100 | 1st Qu.:2.800 | 1st Qu.:1.600 | 1st Qu.:0.300 | versicolor:50 | |
| Median :5.800 | Median :3.000 | Median :4.350 | Median :1.300 | virginica :50 | |
| Mean :5.843 | Mean :3.057 | Mean :3.758 | Mean :1.199 | NA | |
| 3rd Qu.:6.400 | 3rd Qu.:3.300 | 3rd Qu.:5.100 | 3rd Qu.:1.800 | NA | |
| Max. :7.900 | Max. :4.400 | Max. :6.900 | Max. :2.500 | NA |
head(iris)
| Sepal.Length | Sepal.Width | Petal.Length | Petal.Width | Species |
|---|---|---|---|---|
| 5.1 | 3.5 | 1.4 | 0.2 | setosa |
| 4.9 | 3.0 | 1.4 | 0.2 | setosa |
| 4.7 | 3.2 | 1.3 | 0.2 | setosa |
| 4.6 | 3.1 | 1.5 | 0.2 | setosa |
| 5.0 | 3.6 | 1.4 | 0.2 | setosa |
| 5.4 | 3.9 | 1.7 | 0.4 | setosa |
tail(iris)
| Sepal.Length | Sepal.Width | Petal.Length | Petal.Width | Species | |
|---|---|---|---|---|---|
| 145 | 6.7 | 3.3 | 5.7 | 2.5 | virginica |
| 146 | 6.7 | 3.0 | 5.2 | 2.3 | virginica |
| 147 | 6.3 | 2.5 | 5.0 | 1.9 | virginica |
| 148 | 6.5 | 3.0 | 5.2 | 2.0 | virginica |
| 149 | 6.2 | 3.4 | 5.4 | 2.3 | virginica |
| 150 | 5.9 | 3.0 | 5.1 | 1.8 | virginica |
plot(iris$Petal.Width,pch=16)
hist(iris$Petal.Width,x.lab="petal width", col="lightblue")
boxplot(iris,col=c("blue","red","sienna","palevioletred1","blue"),ylab="Centimeters")
myiris <-
data.frame(iris$Sepal.Length,iris$Sepal.Width,iris$Petal.Length,iris$Petal.Width)
d <- data.frame(myiris)
dcor <- as.matrix(cor(d))
dcor
| iris.Sepal.Length | iris.Sepal.Width | iris.Petal.Length | iris.Petal.Width | |
|---|---|---|---|---|
| iris.Sepal.Length | 1.0000000 | -0.1175698 | 0.8717538 | 0.8179411 |
| iris.Sepal.Width | -0.1175698 | 1.0000000 | -0.4284401 | -0.3661259 |
| iris.Petal.Length | 0.8717538 | -0.4284401 | 1.0000000 | 0.9628654 |
| iris.Petal.Width | 0.8179411 | -0.3661259 | 0.9628654 | 1.0000000 |
library(psych)
pairs.panels(iris)
# Separate my training and scoring data sets...
set.seed(1234)
ind <- sample(2, nrow(iris), replace=TRUE, prob=c(0.67, 0.33))
iris.training <- iris[ind==1, 1:4]
iris.test <- iris[ind==2, 1:4]
# What we want to predict...
iris.trainLabels <- iris[ind==1, 5]
iris.testLabels <- iris[ind==2, 5]
library(class)
iris_pred <- knn(train = iris.training, test = iris.test, cl = iris.trainLabels, k=3)
iris_pred
[1] setosa setosa setosa setosa setosa setosa
[7] setosa setosa setosa setosa setosa setosa
[13] versicolor versicolor versicolor versicolor versicolor versicolor
[19] versicolor versicolor versicolor versicolor versicolor versicolor
[25] virginica virginica virginica virginica versicolor virginica
[31] virginica virginica virginica virginica virginica virginica
[37] virginica virginica virginica virginica
Levels: setosa versicolor virginica
# Score the model...
library(gmodels)
CrossTable(x = iris.testLabels, y = iris_pred, prop.chisq=FALSE)
# From http://davetang.org/muse/2013/03/12/building-a-classification-tree-in-r/
library(tree)
tree1 <- tree(Species ~ Sepal.Width + Sepal.Length + Petal.Length + Petal.Width,data = iris)
plot(tree1)
text(tree1)
# Use th model to get classification...
predict.list <- predict(tree1, iris.test, type="class")
predict.list
[1] setosa setosa setosa setosa setosa setosa
[7] setosa setosa setosa setosa setosa setosa
[13] versicolor versicolor versicolor versicolor versicolor versicolor
[19] versicolor versicolor versicolor versicolor versicolor versicolor
[25] virginica virginica virginica virginica virginica virginica
[31] virginica virginica virginica virginica virginica virginica
[37] virginica virginica virginica virginica
Levels: setosa versicolor virginica
# Score the model...
library(gmodels)
CrossTable(x = iris.testLabels, y = predict.list, prop.chisq=FALSE)