## 14.2 ##########

library(arules)
## Loading required package: Matrix
## 
## Attaching package: 'arules'
## The following objects are masked from 'package:base':
## 
##     abbreviate, write
library(arulesViz)
## Loading required package: grid
## Registered S3 method overwritten by 'seriation':
##   method         from 
##   reorder.hclust gclus
ct.df <- read.csv("Coursetopics.csv")
View(ct.df)
summary(ct.df)
##      Intro          DataMining         Survey          Cat.Data     
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.0000   Median :0.0000   Median :0.0000  
##  Mean   :0.3945   Mean   :0.1781   Mean   :0.1863   Mean   :0.2082  
##  3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.0000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##    Regression        Forecast           DOE               SW        
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.0000   Median :0.0000   Median :0.0000  
##  Mean   :0.2082   Mean   :0.1397   Mean   :0.1726   Mean   :0.2219  
##  3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.0000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000
ct.mat <- as.matrix(ct.df) # converting to matrix from dataframe
head(ct.mat)
##      Intro DataMining Survey Cat.Data Regression Forecast DOE SW
## [1,]     1          1      0        0          0        0   0  0
## [2,]     0          0      1        0          0        0   0  0
## [3,]     0          1      0        1          1        0   0  1
## [4,]     1          0      0        0          0        0   0  0
## [5,]     1          1      0        0          0        0   0  0
## [6,]     0          1      0        0          0        0   0  0
ct.trans <- as(ct.mat, "transactions")  # converting to "transaction" class
ct.trans@itemInfo
##       labels
## 1      Intro
## 2 DataMining
## 3     Survey
## 4   Cat.Data
## 5 Regression
## 6   Forecast
## 7        DOE
## 8         SW
rules <- apriori(ct.trans, parameter = list(supp = 7/365, conf = 0.6, target = "rules"))
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime    support minlen
##         0.6    0.1    1 none FALSE            TRUE       5 0.01917808      1
##  maxlen target   ext
##      10  rules FALSE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 7 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[8 item(s), 365 transaction(s)] done [0.00s].
## sorting and recoding items ... [8 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [11 rule(s)] done [0.00s].
## creating S4 object  ... done [0.00s].
inspect(sort(rules, by= 'lift'))
##      lhs                        rhs          support    confidence lift    
## [1]  {Intro,Regression,DOE}  => {SW}         0.01917808 0.7777778  3.504801
## [2]  {Intro,DOE,SW}          => {Regression} 0.01917808 0.6363636  3.056220
## [3]  {DataMining,Regression} => {Cat.Data}   0.02739726 0.6250000  3.001645
## [4]  {Intro,DOE}             => {SW}         0.03013699 0.6470588  2.915759
## [5]  {Regression,DOE}        => {SW}         0.01917808 0.6363636  2.867565
## [6]  {Regression,DOE,SW}     => {Intro}      0.01917808 1.0000000  2.534722
## [7]  {Regression,DOE}        => {Intro}      0.02465753 0.8181818  2.073864
## [8]  {Regression,SW}         => {Intro}      0.03835616 0.7000000  1.774306
## [9]  {Survey,SW}             => {Intro}      0.03287671 0.6666667  1.689815
## [10] {DataMining,Regression} => {Intro}      0.02739726 0.6250000  1.584201
## [11] {Cat.Data,Regression}   => {Intro}      0.03287671 0.6000000  1.520833
##      count
## [1]   7   
## [2]   7   
## [3]  10   
## [4]  11   
## [5]   7   
## [6]   7   
## [7]   9   
## [8]  14   
## [9]  12   
## [10] 10   
## [11] 12
plot(rules)
## To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.

itemFrequencyPlot(ct.trans, ylim=c(0.0,0.5))

## 15.2 ############

ph.df <- read.csv("Pharmaceuticals.csv")
View(ph.df)
names(ph.df)
##  [1] "Symbol"                "Name"                  "Market_Cap"           
##  [4] "Beta"                  "PE_Ratio"              "ROE"                  
##  [7] "ROA"                   "Asset_Turnover"        "Leverage"             
## [10] "Rev_Growth"            "Net_Profit_Margin"     "Median_Recommendation"
## [13] "Location"              "Exchange"
summary(ph.df)
##      Symbol                    Name      Market_Cap          Beta       
##  ABT    : 1   Abbott Laboratories: 1   Min.   :  0.41   Min.   :0.1800  
##  AGN    : 1   Allergan, Inc.     : 1   1st Qu.:  6.30   1st Qu.:0.3500  
##  AHM    : 1   Amersham plc       : 1   Median : 48.19   Median :0.4600  
##  AVE    : 1   AstraZeneca PLC    : 1   Mean   : 57.65   Mean   :0.5257  
##  AZN    : 1   Aventis            : 1   3rd Qu.: 73.84   3rd Qu.:0.6500  
##  BAY    : 1   Bayer AG           : 1   Max.   :199.47   Max.   :1.1100  
##  (Other):15   (Other)            :15                                    
##     PE_Ratio          ROE            ROA        Asset_Turnover    Leverage     
##  Min.   : 3.60   Min.   : 3.9   Min.   : 1.40   Min.   :0.3    Min.   :0.0000  
##  1st Qu.:18.90   1st Qu.:14.9   1st Qu.: 5.70   1st Qu.:0.6    1st Qu.:0.1600  
##  Median :21.50   Median :22.6   Median :11.20   Median :0.6    Median :0.3400  
##  Mean   :25.46   Mean   :25.8   Mean   :10.51   Mean   :0.7    Mean   :0.5857  
##  3rd Qu.:27.90   3rd Qu.:31.0   3rd Qu.:15.00   3rd Qu.:0.9    3rd Qu.:0.6000  
##  Max.   :82.50   Max.   :62.9   Max.   :20.30   Max.   :1.1    Max.   :3.5100  
##                                                                                
##    Rev_Growth    Net_Profit_Margin   Median_Recommendation        Location 
##  Min.   :-3.17   Min.   : 2.6      Hold         :9         CANADA     : 1  
##  1st Qu.: 6.38   1st Qu.:11.2      Moderate Buy :7         FRANCE     : 1  
##  Median : 9.37   Median :16.1      Moderate Sell:4         GERMANY    : 1  
##  Mean   :13.37   Mean   :15.7      Strong Buy   :1         IRELAND    : 1  
##  3rd Qu.:21.87   3rd Qu.:21.1                              SWITZERLAND: 1  
##  Max.   :34.21   Max.   :25.5                              UK         : 3  
##                                                            US         :13  
##    Exchange 
##  AMEX  : 1  
##  NASDAQ: 1  
##  NYSE  :19  
##             
##             
##             
## 
rownames(ph.df) <- ph.df[,1]
ph2.df <- ph.df[,-c(1,2,12:14)]
ph2.df.norm <- sapply(ph2.df, scale)
summary(ph2.df.norm)
##    Market_Cap           Beta            PE_Ratio            ROE         
##  Min.   :-0.9768   Min.   :-1.3466   Min.   :-1.3404   Min.   :-1.4515  
##  1st Qu.:-0.8763   1st Qu.:-0.6844   1st Qu.:-0.4023   1st Qu.:-0.7223  
##  Median :-0.1614   Median :-0.2560   Median :-0.2429   Median :-0.2118  
##  Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.: 0.2762   3rd Qu.: 0.4841   3rd Qu.: 0.1495   3rd Qu.: 0.3450  
##  Max.   : 2.4200   Max.   : 2.2758   Max.   : 3.4971   Max.   : 2.4597  
##       ROA          Asset_Turnover       Leverage          Rev_Growth     
##  Min.   :-1.7128   Min.   :-1.8451   Min.   :-0.74966   Min.   :-1.4971  
##  1st Qu.:-0.9047   1st Qu.:-0.4613   1st Qu.:-0.54487   1st Qu.:-0.6328  
##  Median : 0.1289   Median :-0.4613   Median :-0.31449   Median :-0.3621  
##  Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.00000   Mean   : 0.0000  
##  3rd Qu.: 0.8430   3rd Qu.: 0.9225   3rd Qu.: 0.01828   3rd Qu.: 0.7693  
##  Max.   : 1.8389   Max.   : 1.8451   Max.   : 3.74280   Max.   : 1.8862  
##  Net_Profit_Margin 
##  Min.   :-1.99560  
##  1st Qu.:-0.68504  
##  Median : 0.06168  
##  Mean   : 0.00000  
##  3rd Qu.: 0.82364  
##  Max.   : 1.49416
sdev <- sapply(as.data.frame(ph2.df.norm),sd)
rownames(ph2.df.norm) <- rownames(ph.df)
d.norm <- dist(ph2.df.norm, method= "euclidean")

## part a 
hc_ward <- hclust(d.norm, method = "ward.D")
firms <- cutree(hc_ward, k = 2)
plot(hc_ward, hang = -1, ann = FALSE)

## part b 
centers <- aggregate(ph2.df.norm, by = list(firms), FUN = mean)
centers
##   Group.1 Market_Cap       Beta   PE_Ratio        ROE        ROA Asset_Turnover
## 1       1  0.6733825 -0.3586419 -0.2763512  0.6565978  0.8344159      0.4612656
## 2       2 -0.7407208  0.3945061  0.3039863 -0.7222576 -0.9178575     -0.5073922
##     Leverage Rev_Growth Net_Profit_Margin
## 1 -0.3331068 -0.2902163         0.6823310
## 2  0.3664175  0.3192379        -0.7505641
## part c 
ph.df[,15] <- firms
colnames (ph.df)[15]<- "Cluster"
ph.df[,c(11,15)]
##      Net_Profit_Margin Cluster
## ABT               16.1       1
## AGN                5.5       2
## AHM               11.2       2
## AZN               18.0       1
## AVE               12.9       2
## BAY                2.6       2
## BMY               20.6       1
## CHTT               7.5       2
## ELN               13.3       2
## LLY               23.4       1
## GSK               21.1       1
## IVX               11.0       2
## JNJ               17.9       1
## MRX               21.3       2
## MRK               14.1       1
## NVS               22.4       1
## PFE               25.2       1
## PHA                7.3       2
## SGP               17.6       1
## WPI               15.1       2
## WYE               25.5       1
rcmnd <- aggregate(ph.df$Median_Recommendation, 
                   by = list(firms, ph.df$Median_Recommendation),FUN = length)
rcmnd
##   Group.1       Group.2 x
## 1       1          Hold 6
## 2       2          Hold 3
## 3       1  Moderate Buy 3
## 4       2  Moderate Buy 4
## 5       1 Moderate Sell 2
## 6       2 Moderate Sell 2
## 7       2    Strong Buy 1
location <- aggregate(ph.df$Location, by = list(firms, ph.df$Location), FUN = length)
location
##   Group.1     Group.2 x
## 1       2      CANADA 1
## 2       2      FRANCE 1
## 3       2     GERMANY 1
## 4       2     IRELAND 1
## 5       1 SWITZERLAND 1
## 6       1          UK 2
## 7       2          UK 1
## 8       1          US 8
## 9       2          US 5
exchange <- aggregate(ph.df$Exchange, by = list(firms, ph.df$Exchange), FUN = length)
exchange
##   Group.1 Group.2  x
## 1       2    AMEX  1
## 2       2  NASDAQ  1
## 3       1    NYSE 11
## 4       2    NYSE  8
combined <- aggregate(ph.df$Symbol, by = list(firms,ph.df[,12],ph.df[,13],ph.df[,14]), 
                      FUN = length)
combined
##    Group.1       Group.2     Group.3 Group.4 x
## 1        2          Hold          US    AMEX 1
## 2        2  Moderate Buy          US  NASDAQ 1
## 3        2  Moderate Buy      CANADA    NYSE 1
## 4        2  Moderate Buy      FRANCE    NYSE 1
## 5        2          Hold     GERMANY    NYSE 1
## 6        2 Moderate Sell     IRELAND    NYSE 1
## 7        1          Hold SWITZERLAND    NYSE 1
## 8        1          Hold          UK    NYSE 1
## 9        1 Moderate Sell          UK    NYSE 1
## 10       2    Strong Buy          UK    NYSE 1
## 11       1          Hold          US    NYSE 4
## 12       2          Hold          US    NYSE 1
## 13       1  Moderate Buy          US    NYSE 3
## 14       2  Moderate Buy          US    NYSE 1
## 15       1 Moderate Sell          US    NYSE 1
## 16       2 Moderate Sell          US    NYSE 1