bob <- read.csv("C:/Users/justt/Desktop/School/622/Homework/HW 1/bob_ross.csv")
bob_remove <- bob[,-1:-2]
bob_convert <- as(as.matrix(bob_remove), "transactions")
bob_bask <- apriori(bob_convert, parameter = list(sup = 0.3, conf = 0.9, target = "rules"))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.9 0.1 1 none FALSE TRUE 5 0.3 1
## maxlen target ext
## 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 120
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[66 item(s), 403 transaction(s)] done [0.00s].
## sorting and recoding items ... [9 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [31 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
There were 31 rules created with Tree(s) in the right hand side. Some of the landscape types that contain tree(s) are River, Grass, Lake, Mountain, Deciduous, Conifer, and of course Tree(s).
n_train <- read.csv("C:/Users/justt/Desktop/School/622/Homework/HW 1/nhanes_train.csv")
# regression model
rand_train <- lm(Weight~., data = n_train)
# decision tree
dtree_train <- rpart(formula = Weight~., data = n_train)
# bagging model using 100 bootstrapped samples
bagg_train <- bagging(formula = Weight~., data = n_train, nbagg = 100)
# random forest with 200 trees
rf_train <- randomForest(Weight ~., data = n_train, importance = TRUE, ntree = 200)
# boosting model with 200 trees each having 5 splits and a shrinkage/weight of 0.03
boost_train <- gbm(formula = Weight ~., data = n_train, distribution = "gaussian", n.trees = 200, shrinkage = 0.03, interaction.depth = 5)
n_test <- read.csv("C:/Users/justt/Desktop/School/622/Homework/HW 1/nhanes_test.csv")
mean(n_train$Age)
## [1] 45.14699
mean(n_train$Weight)
## [1] 82.63375
mean(n_train$Height)
## [1] 169.5465
mean(n_train$Pulse)
## [1] 72.32045
library(class)
fit <- kmeans(n_train, 4)
Age Weight Height Pulse
Mean of cluster 1 - 37.09263 - 115.77789 - 175.9537 - 77.14105
Mean of cluster 2 - 63.45441 - 67.71181 - 161.9109 - 69.48580
Mean of cluster 3 - 54.84072 - 92.01787 - 174.3251 - 69.24931
Mean of cluster 4 - 29.80299 - 70.16010 - 168.1681 - 74.13532
Mean of Variables - 45.14699 - 82.63375 - 169.5465 - 72.32045
fit$cluster[2000]
## [1] 1
The data for row 2000 was placed in cluster 4.