## 14.2 ##########
library(arules)
## Loading required package: Matrix
##
## Attaching package: 'arules'
## The following objects are masked from 'package:base':
##
## abbreviate, write
library(arulesViz)
## Loading required package: grid
## Registered S3 method overwritten by 'seriation':
## method from
## reorder.hclust gclus
ct.df <- read.csv("Coursetopics.csv")
View(ct.df)
summary(ct.df)
## Intro DataMining Survey Cat.Data
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.3945 Mean :0.1781 Mean :0.1863 Mean :0.2082
## 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## Regression Forecast DOE SW
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.2082 Mean :0.1397 Mean :0.1726 Mean :0.2219
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
ct.mat <- as.matrix(ct.df) # converting to matrix from dataframe
head(ct.mat)
## Intro DataMining Survey Cat.Data Regression Forecast DOE SW
## [1,] 1 1 0 0 0 0 0 0
## [2,] 0 0 1 0 0 0 0 0
## [3,] 0 1 0 1 1 0 0 1
## [4,] 1 0 0 0 0 0 0 0
## [5,] 1 1 0 0 0 0 0 0
## [6,] 0 1 0 0 0 0 0 0
ct.trans <- as(ct.mat, "transactions") # converting to "transaction" class
ct.trans@itemInfo
## labels
## 1 Intro
## 2 DataMining
## 3 Survey
## 4 Cat.Data
## 5 Regression
## 6 Forecast
## 7 DOE
## 8 SW
rules <- apriori(ct.trans, parameter = list(supp = 7/365, conf = 0.6, target = "rules"))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.6 0.1 1 none FALSE TRUE 5 0.01917808 1
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 7
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[8 item(s), 365 transaction(s)] done [0.00s].
## sorting and recoding items ... [8 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [11 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
inspect(sort(rules, by= 'lift'))
## lhs rhs support confidence lift
## [1] {Intro,Regression,DOE} => {SW} 0.01917808 0.7777778 3.504801
## [2] {Intro,DOE,SW} => {Regression} 0.01917808 0.6363636 3.056220
## [3] {DataMining,Regression} => {Cat.Data} 0.02739726 0.6250000 3.001645
## [4] {Intro,DOE} => {SW} 0.03013699 0.6470588 2.915759
## [5] {Regression,DOE} => {SW} 0.01917808 0.6363636 2.867565
## [6] {Regression,DOE,SW} => {Intro} 0.01917808 1.0000000 2.534722
## [7] {Regression,DOE} => {Intro} 0.02465753 0.8181818 2.073864
## [8] {Regression,SW} => {Intro} 0.03835616 0.7000000 1.774306
## [9] {Survey,SW} => {Intro} 0.03287671 0.6666667 1.689815
## [10] {DataMining,Regression} => {Intro} 0.02739726 0.6250000 1.584201
## [11] {Cat.Data,Regression} => {Intro} 0.03287671 0.6000000 1.520833
## count
## [1] 7
## [2] 7
## [3] 10
## [4] 11
## [5] 7
## [6] 7
## [7] 9
## [8] 14
## [9] 12
## [10] 10
## [11] 12
plot(rules)
## To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.

itemFrequencyPlot(ct.trans, ylim=c(0.0,0.5))

## 15.2 ############
ph.df <- read.csv("Pharmaceuticals.csv")
View(ph.df)
names(ph.df)
## [1] "Symbol" "Name" "Market_Cap"
## [4] "Beta" "PE_Ratio" "ROE"
## [7] "ROA" "Asset_Turnover" "Leverage"
## [10] "Rev_Growth" "Net_Profit_Margin" "Median_Recommendation"
## [13] "Location" "Exchange"
summary(ph.df)
## Symbol Name Market_Cap Beta
## ABT : 1 Abbott Laboratories: 1 Min. : 0.41 Min. :0.1800
## AGN : 1 Allergan, Inc. : 1 1st Qu.: 6.30 1st Qu.:0.3500
## AHM : 1 Amersham plc : 1 Median : 48.19 Median :0.4600
## AVE : 1 AstraZeneca PLC : 1 Mean : 57.65 Mean :0.5257
## AZN : 1 Aventis : 1 3rd Qu.: 73.84 3rd Qu.:0.6500
## BAY : 1 Bayer AG : 1 Max. :199.47 Max. :1.1100
## (Other):15 (Other) :15
## PE_Ratio ROE ROA Asset_Turnover Leverage
## Min. : 3.60 Min. : 3.9 Min. : 1.40 Min. :0.3 Min. :0.0000
## 1st Qu.:18.90 1st Qu.:14.9 1st Qu.: 5.70 1st Qu.:0.6 1st Qu.:0.1600
## Median :21.50 Median :22.6 Median :11.20 Median :0.6 Median :0.3400
## Mean :25.46 Mean :25.8 Mean :10.51 Mean :0.7 Mean :0.5857
## 3rd Qu.:27.90 3rd Qu.:31.0 3rd Qu.:15.00 3rd Qu.:0.9 3rd Qu.:0.6000
## Max. :82.50 Max. :62.9 Max. :20.30 Max. :1.1 Max. :3.5100
##
## Rev_Growth Net_Profit_Margin Median_Recommendation Location
## Min. :-3.17 Min. : 2.6 Hold :9 CANADA : 1
## 1st Qu.: 6.38 1st Qu.:11.2 Moderate Buy :7 FRANCE : 1
## Median : 9.37 Median :16.1 Moderate Sell:4 GERMANY : 1
## Mean :13.37 Mean :15.7 Strong Buy :1 IRELAND : 1
## 3rd Qu.:21.87 3rd Qu.:21.1 SWITZERLAND: 1
## Max. :34.21 Max. :25.5 UK : 3
## US :13
## Exchange
## AMEX : 1
## NASDAQ: 1
## NYSE :19
##
##
##
##
rownames(ph.df) <- ph.df[,1]
ph2.df <- ph.df[,-c(1,2,12:14)]
ph2.df.norm <- sapply(ph2.df, scale)
summary(ph2.df.norm)
## Market_Cap Beta PE_Ratio ROE
## Min. :-0.9768 Min. :-1.3466 Min. :-1.3404 Min. :-1.4515
## 1st Qu.:-0.8763 1st Qu.:-0.6844 1st Qu.:-0.4023 1st Qu.:-0.7223
## Median :-0.1614 Median :-0.2560 Median :-0.2429 Median :-0.2118
## Mean : 0.0000 Mean : 0.0000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.: 0.2762 3rd Qu.: 0.4841 3rd Qu.: 0.1495 3rd Qu.: 0.3450
## Max. : 2.4200 Max. : 2.2758 Max. : 3.4971 Max. : 2.4597
## ROA Asset_Turnover Leverage Rev_Growth
## Min. :-1.7128 Min. :-1.8451 Min. :-0.74966 Min. :-1.4971
## 1st Qu.:-0.9047 1st Qu.:-0.4613 1st Qu.:-0.54487 1st Qu.:-0.6328
## Median : 0.1289 Median :-0.4613 Median :-0.31449 Median :-0.3621
## Mean : 0.0000 Mean : 0.0000 Mean : 0.00000 Mean : 0.0000
## 3rd Qu.: 0.8430 3rd Qu.: 0.9225 3rd Qu.: 0.01828 3rd Qu.: 0.7693
## Max. : 1.8389 Max. : 1.8451 Max. : 3.74280 Max. : 1.8862
## Net_Profit_Margin
## Min. :-1.99560
## 1st Qu.:-0.68504
## Median : 0.06168
## Mean : 0.00000
## 3rd Qu.: 0.82364
## Max. : 1.49416
sdev <- sapply(as.data.frame(ph2.df.norm),sd)
rownames(ph2.df.norm) <- rownames(ph.df)
d.norm <- dist(ph2.df.norm, method= "euclidean")
## part a
hc_ward <- hclust(d.norm, method = "ward.D")
firms <- cutree(hc_ward, k = 2)
plot(hc_ward, hang = -1, ann = FALSE)

## part b
centers <- aggregate(ph2.df.norm, by = list(firms), FUN = mean)
centers
## Group.1 Market_Cap Beta PE_Ratio ROE ROA Asset_Turnover
## 1 1 0.6733825 -0.3586419 -0.2763512 0.6565978 0.8344159 0.4612656
## 2 2 -0.7407208 0.3945061 0.3039863 -0.7222576 -0.9178575 -0.5073922
## Leverage Rev_Growth Net_Profit_Margin
## 1 -0.3331068 -0.2902163 0.6823310
## 2 0.3664175 0.3192379 -0.7505641
## part c
ph.df[,15] <- firms
colnames (ph.df)[15]<- "Cluster"
ph.df[,c(11,15)]
## Net_Profit_Margin Cluster
## ABT 16.1 1
## AGN 5.5 2
## AHM 11.2 2
## AZN 18.0 1
## AVE 12.9 2
## BAY 2.6 2
## BMY 20.6 1
## CHTT 7.5 2
## ELN 13.3 2
## LLY 23.4 1
## GSK 21.1 1
## IVX 11.0 2
## JNJ 17.9 1
## MRX 21.3 2
## MRK 14.1 1
## NVS 22.4 1
## PFE 25.2 1
## PHA 7.3 2
## SGP 17.6 1
## WPI 15.1 2
## WYE 25.5 1
rcmnd <- aggregate(ph.df$Median_Recommendation,
by = list(firms, ph.df$Median_Recommendation),FUN = length)
rcmnd
## Group.1 Group.2 x
## 1 1 Hold 6
## 2 2 Hold 3
## 3 1 Moderate Buy 3
## 4 2 Moderate Buy 4
## 5 1 Moderate Sell 2
## 6 2 Moderate Sell 2
## 7 2 Strong Buy 1
location <- aggregate(ph.df$Location, by = list(firms, ph.df$Location), FUN = length)
location
## Group.1 Group.2 x
## 1 2 CANADA 1
## 2 2 FRANCE 1
## 3 2 GERMANY 1
## 4 2 IRELAND 1
## 5 1 SWITZERLAND 1
## 6 1 UK 2
## 7 2 UK 1
## 8 1 US 8
## 9 2 US 5
exchange <- aggregate(ph.df$Exchange, by = list(firms, ph.df$Exchange), FUN = length)
exchange
## Group.1 Group.2 x
## 1 2 AMEX 1
## 2 2 NASDAQ 1
## 3 1 NYSE 11
## 4 2 NYSE 8
combined <- aggregate(ph.df$Symbol, by = list(firms,ph.df[,12],ph.df[,13],ph.df[,14]),
FUN = length)
combined
## Group.1 Group.2 Group.3 Group.4 x
## 1 2 Hold US AMEX 1
## 2 2 Moderate Buy US NASDAQ 1
## 3 2 Moderate Buy CANADA NYSE 1
## 4 2 Moderate Buy FRANCE NYSE 1
## 5 2 Hold GERMANY NYSE 1
## 6 2 Moderate Sell IRELAND NYSE 1
## 7 1 Hold SWITZERLAND NYSE 1
## 8 1 Hold UK NYSE 1
## 9 1 Moderate Sell UK NYSE 1
## 10 2 Strong Buy UK NYSE 1
## 11 1 Hold US NYSE 4
## 12 2 Hold US NYSE 1
## 13 1 Moderate Buy US NYSE 3
## 14 2 Moderate Buy US NYSE 1
## 15 1 Moderate Sell US NYSE 1
## 16 2 Moderate Sell US NYSE 1