Sys.setenv(JAVA_HOME="C:\\Program Files\\Java\\jdk1.8.0_45\\jre")
#Sys.setenv(JAVA_HOME="C:\\Users\\jamey\\Documents\\R\\win-library\\3.1\\rJava\\libs\\x64\\rJava.dll")

library(rJava)
## Warning: package 'rJava' was built under R version 3.1.3
library(xlsx)
## Warning: package 'xlsx' was built under R version 3.1.3
## Loading required package: xlsxjars
## Warning: package 'xlsxjars' was built under R version 3.1.3
library(rvest)
## Warning: package 'rvest' was built under R version 3.1.3
library(plyr)
## Warning: package 'plyr' was built under R version 3.1.3
library(tidyr)
## Warning: package 'tidyr' was built under R version 3.1.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.1.3
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
#install.packages("cluster")
library(cluster)
#install.packages("NbClust")
library(NbClust)
## Warning: package 'NbClust' was built under R version 3.1.3
#install.packages("flexclust")
library(flexclust)
## Warning: package 'flexclust' was built under R version 3.1.3
## Loading required package: grid
## Loading required package: lattice
## Loading required package: modeltools
## Warning: package 'modeltools' was built under R version 3.1.3
## Loading required package: stats4
## 
## Attaching package: 'modeltools'
## 
## The following object is masked from 'package:plyr':
## 
##     empty
## 
## The following object is masked from 'package:rJava':
## 
##     clone
#install.packages("fMultivar")
library(fMultivar)
## Warning: package 'fMultivar' was built under R version 3.1.3
## Loading required package: timeDate
## Warning: package 'timeDate' was built under R version 3.1.3
## Loading required package: timeSeries
## Warning: package 'timeSeries' was built under R version 3.1.3
## Loading required package: fBasics
## Warning: package 'fBasics' was built under R version 3.1.3
## 
## 
## Rmetrics Package fBasics
## Analysing Markets and calculating Basic Statistics
## Copyright (C) 2005-2014 Rmetrics Association Zurich
## Educational Software for Financial Engineering and Computational Science
## Rmetrics is free software and comes with ABSOLUTELY NO WARRANTY.
## https://www.rmetrics.org --- Mail to: info@rmetrics.org
## 
## Attaching package: 'fBasics'
## 
## The following object is masked from 'package:flexclust':
## 
##     getModel
## 
## The following object is masked from 'package:modeltools':
## 
##     getModel
## 
## 
## 
## Rmetrics Package fMultivar
## Analysing and Modeling Multivariate Financial Return Distributions
## Copyright (C) 2005-2014 Rmetrics Association Zurich
## Educational Software for Financial Engineering and Computational Science
## Rmetrics is free software and comes with ABSOLUTELY NO WARRANTY.
## https://www.rmetrics.org --- Mail to: info@rmetrics.org
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.1.3
#install.packages("rattle")
library(rattle)
## Warning: package 'rattle' was built under R version 3.1.3
## Rattle: A free graphical interface for data mining with R.
## Version 3.4.1 Copyright (c) 2006-2014 Togaware Pty Ltd.
## Type 'rattle()' to shake, rattle, and roll your data.
#url.nhe <- "http://www.cms.gov/Research-Statistics-Data-and-Systems/Statistics-Trends-and-Reports/NationalHealthExpendData/Downloads/NHE2013.zip" 

#td <- tempdir()
#tf <- tempfile(tmpdir=td, fileext=".zip")
#download.file(url.nhe, tf)


#Read the data into a data frame
nhe.data <- read.xlsx("~/IS 607/FINAL PROJECT/NHE2013.xls",stringsAsFactors=F, header=FALSE,colClasses="integer", startRow=2, rowIndex=c(2,4,6,38,32,35,103,133,223,253,283,313,343,373,493), colIndex=NULL,as.data.frame=TRUE,sheetIndex=1)

#unlink the file handle
#unlink(tf)

#Make rows as colums and columns as rows

nhe.data.final <-t(nhe.data)
View(nhe.data.final)
str(nhe.data.final)
##  num [1:55, 1:15] NA 1960 1961 1962 1963 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : chr [1:55] "X1" "X2" "X3" "X4" ...
##   ..$ : NULL
#Nullify row names
rownames(nhe.data.final) <- NULL

#Make the row 1 as the column names

#colnames(nhe.data.final) <- nhe.data.final[1,]

#delete the new row 1
nhe.data.final = nhe.data.final[-1,]

#Nameing columns and rowname 

colnames(nhe.data.final) <- c("Year","Out of Pocket","Private Health Insurance","Public Health activity","Investment","Total CMS programs","Total Hospital Espenditure","Total Physician and Clinical Expenditure","Total Home Health care Expenditure","Other Non-Durable medical Products Expenditure","total prescription drug Expenditure","total durable medical equipment expenditure","Total Nursing care facilities and Continuing Care","Total Other health,Residential,and Personal care expenditure","Net cost of Health Insurance expenditure")
rownames(nhe.data.final) <- nhe.data.final[,1]



#Convert char to number column 2:16
#nhe.data.final<- colClasses("Integer")

head(nhe.data.final)
##      Year Out of Pocket Private Health Insurance Public Health activity
## 1960 1960         13051                     5779                    371
## 1961 1961         13422                     6419                    409
## 1962 1962         14299                     7113                    456
## 1963 1963         15361                     7887                    509
## 1964 1964         16981                     8982                    572
## 1965 1965         18262                    10000                    621
##      Investment Total CMS programs Total Hospital Espenditure
## 1960       2566                  0                       8985
## 1961       2863                  0                       9777
## 1962       3493                  0                      10432
## 1963       3802                  0                      11507
## 1964       4408                  0                      12501
## 1965       4765                  0                      13545
##      Total Physician and Clinical Expenditure
## 1960                                     5630
## 1961                                     5842
## 1962                                     6256
## 1963                                     7075
## 1964                                     8109
## 1965                                     8587
##      Total Home Health care Expenditure
## 1960                                 57
## 1961                                 61
## 1962                                 65
## 1963                                 69
## 1964                                 75
## 1965                                 89
##      Other Non-Durable medical Products Expenditure
## 1960                                           1626
## 1961                                           1764
## 1962                                           1901
## 1963                                           1944
## 1964                                           2083
## 1965                                           2211
##      total prescription drug Expenditure
## 1960                                2676
## 1961                                2718
## 1962                                3029
## 1963                                3159
## 1964                                3347
## 1965                                3715
##      total durable medical equipment expenditure
## 1960                                         740
## 1961                                         765
## 1962                                         910
## 1963                                         901
## 1964                                        1004
## 1965                                        1105
##      Total Nursing care facilities and Continuing Care
## 1960                                               811
## 1961                                               841
## 1962                                               876
## 1963                                              1009
## 1964                                              1168
## 1965                                              1408
##      Total Other health,Residential,and Personal care expenditure
## 1960                                                          451
## 1961                                                          518
## 1962                                                          552
## 1963                                                          604
## 1964                                                          665
## 1965                                                          725
##      Net cost of Health Insurance expenditure
## 1960                                     1013
## 1961                                     1093
## 1962                                     1205
## 1963                                     1243
## 1964                                     1367
## 1965                                     1598
str(nhe.data.final)
##  num [1:54, 1:15] 1960 1961 1962 1963 1964 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : chr [1:54] "1960" "1961" "1962" "1963" ...
##   ..$ : chr [1:15] "Year" "Out of Pocket" "Private Health Insurance" "Public Health activity" ...
summary(nhe.data.final)
##       Year      Out of Pocket    Private Health Insurance
##  Min.   :1960   Min.   : 13051   Min.   :  5779          
##  1st Qu.:1973   1st Qu.: 32404   1st Qu.: 23656          
##  Median :1986   Median :107098   Median :142618          
##  Mean   :1986   Mean   :127051   Mean   :281916          
##  3rd Qu.:2000   3rd Qu.:198633   3rd Qu.:449874          
##  Max.   :2013   Max.   :339422   Max.   :961741          
##  Public Health activity   Investment     Total CMS programs
##  Min.   :  371          Min.   :  2566   Min.   :      0   
##  1st Qu.: 2238          1st Qu.: 10491   1st Qu.:  21240   
##  Median :12980          Median : 34249   Median : 127816   
##  Mean   :24577          Mean   : 55231   Mean   : 275126   
##  3rd Qu.:42468          3rd Qu.: 87416   3rd Qu.: 420850   
##  Max.   :75480          Max.   :164594   Max.   :1048584   
##  Total Hospital Espenditure Total Physician and Clinical Expenditure
##  Min.   :  8985             Min.   :  5630                          
##  1st Qu.: 39475             1st Qu.: 20223                          
##  Median :182587             Median :106811                          
##  Mean   :277103             Mean   :178203                          
##  3rd Qu.:410054             3rd Qu.:286132                          
##  Max.   :936867             Max.   :586675                          
##  Total Home Health care Expenditure
##  Min.   :   57.0                   
##  1st Qu.:  312.8                   
##  Median : 6524.0                   
##  Mean   :20566.3                   
##  3rd Qu.:35471.5                   
##  Max.   :79772.0                   
##  Other Non-Durable medical Products Expenditure
##  Min.   : 1626                                 
##  1st Qu.: 4110                                 
##  Median :17664                                 
##  Mean   :20353                                 
##  3rd Qu.:31332                                 
##  Max.   :55892                                 
##  total prescription drug Expenditure
##  Min.   :  2676                     
##  1st Qu.:  6968                     
##  Median : 25589                     
##  Mean   : 73531                     
##  3rd Qu.:117070                     
##  Max.   :271096                     
##  total durable medical equipment expenditure
##  Min.   :  740                              
##  1st Qu.: 2312                              
##  Median : 8766                              
##  Mean   :13673                              
##  3rd Qu.:24597                              
##  Max.   :42987                              
##  Total Nursing care facilities and Continuing Care
##  Min.   :   811                                   
##  1st Qu.:  6212                                   
##  Median : 29722                                   
##  Mean   : 49750                                   
##  3rd Qu.: 84038                                   
##  Max.   :155829                                   
##  Total Other health,Residential,and Personal care expenditure
##  Min.   :   451                                              
##  1st Qu.:  2116                                              
##  Median : 16638                                              
##  Mean   : 38351                                              
##  3rd Qu.: 63327                                              
##  Max.   :148230                                              
##  Net cost of Health Insurance expenditure
##  Min.   :  1013                          
##  1st Qu.:  3396                          
##  Median : 20142                          
##  Mean   : 45335                          
##  3rd Qu.: 62149                          
##  Max.   :173615

Agglomerative hierarchical clustering; each year starts as its own cluster. Clusters are then combined two as a time, until all clusters are merged into a single cluster

  1. What are the similaries and differences from 1960 through 2013 healthcare expenditures based on 15 expense categories

  2. can we group certain years as a subgroup into which the expenditures are meaningfully clustered?

data(nhe.data.final, package="flexclust")
## Warning in data(nhe.data.final, package = "flexclust"): data set
## 'nhe.data.final' not found
row.names(nhe.data.final) <- tolower(row.names(nhe.data.final))

# Variables vary in range. scale() function standardize the variables to a mean of zero and a standard deviation of one (x-mean(x)/sd(x)).

nhe.scaled <- scale(nhe.data.final[,-1])
d <- dist(nhe.scaled)
fit.average <- hclust(d, method="average")
fit.single <- hclust(d, method="single")

#The height dimention indicates the average distance between each point in one cluster and each point in the other cluster.

plot(fit.average, hang=-1, cex.axis=2, main="Average Linkage Clustering")

plot(fit.single, hang=-1, cex.axis=2, main="Single Linkage Clustering")

Conclusion

1.The increasing rates in each variable are similarites and differences

2.From the plot, 1960 - 1988 group one cluster, 1989 - 2004 group one cluster, and 2005 - 2013 group one cluster.

#Selecting the number of clusters

devAskNewPage(ask=TRUE)
nc <- NbClust(nhe.scaled, distance="euclidean",
              min.nc=2, max.nc=15, method="average")
## [1] "Frey index : No clustering structure in this data set"

## *** : The Hubert index is a graphical method of determining the number of clusters.
##                 In the plot of Hubert index, we seek a significant knee that corresponds to a 
##                 significant increase of the value of the measure i.e the significant peak in Hubert
##                 index second differences plot. 
## 

## *** : The D index is a graphical method of determining the number of clusters. 
##                 In the plot of D index, we seek a significant knee (the significant peak in Dindex
##                 second differences plot) that corresponds to a significant increase of the value of
##                 the measure. 
##  
## ******************************************************************* 
## * Among all indices:                                                
## * 4 proposed 2 as the best number of clusters 
## * 9 proposed 3 as the best number of clusters 
## * 1 proposed 4 as the best number of clusters 
## * 1 proposed 9 as the best number of clusters 
## * 3 proposed 10 as the best number of clusters 
## * 1 proposed 14 as the best number of clusters 
## * 3 proposed 15 as the best number of clusters 
## 
##                    ***** Conclusion *****                            
##  
## * According to the majority rule, the best number of clusters is  3 
##  
##  
## *******************************************************************
table(nc$Best.n[1,])
## 
##  0  2  3  4  9 10 14 15 
##  2  4  9  1  1  3  1  3
# Obtaining the final cluster solution

clusters <- cutree(fit.average, k=3) # most voted cluster number.
table(clusters)
## clusters
##  1  2  3 
## 29 16  9
# Describe clusters
aggregate(nhe.data.final[,-1], by=list(cluster=clusters), median)
##   cluster Out of Pocket Private Health Insurance Public Health activity
## 1       1         34467                    25972                   2584
## 2       2        157739                   354414                  33600
## 3       3        300914                   833076                  73480
##   Investment Total CMS programs Total Hospital Espenditure
## 1    11233.0            24501.0                    44139.0
## 2    69985.5           361072.5                   357101.5
## 3   149678.0           885775.0                   776831.0
##   Total Physician and Clinical Expenditure
## 1                                    22215
## 2                                   237075
## 3                                   503176
##   Total Home Health care Expenditure
## 1                                423
## 2                              32648
## 3                              67249
##   Other Non-Durable medical Products Expenditure
## 1                                         4491.0
## 2                                        26792.5
## 3                                        50328.0
##   total prescription drug Expenditure
## 1                                7422
## 2                               72879
## 3                              254981
##   total durable medical equipment expenditure
## 1                                        2506
## 2                                       18318
## 3                                       35022
##   Total Nursing care facilities and Continuing Care
## 1                                              6928
## 2                                             71980
## 3                                            138546
##   Total Other health,Residential,and Personal care expenditure
## 1                                                         2409
## 2                                                        48464
## 3                                                       122474
##   Net cost of Health Insurance expenditure
## 1                                   3418.0
## 2                                  48947.5
## 3                                 142607.0
aggregate(as.data.frame(nhe.scaled), by=list(cluster=clusters),median)
##   cluster Out of Pocket Private Health Insurance Public Health activity
## 1       1    -0.8976855               -0.8413059             -0.8530716
## 2       2     0.2975457                0.2383065              0.3499678
## 3       3     1.6857541                1.8117029              1.8968215
##   Investment Total CMS programs Total Hospital Espenditure
## 1 -0.8421071         -0.7934596                 -0.8531743
## 2  0.2823972          0.2721001                  0.2929778
## 3  1.8076864          1.9332682                  1.8301391
##   Total Physician and Clinical Expenditure
## 1                               -0.8587537
## 2                                0.3241024
## 3                                1.7890526
##   Total Home Health care Expenditure
## 1                         -0.8163469
## 2                          0.4896325
## 3                          1.8919038
##   Other Non-Durable medical Products Expenditure
## 1                                     -0.9371832
## 2                                      0.3804977
## 3                                      1.7710893
##   total prescription drug Expenditure
## 1                        -0.728836296
## 2                        -0.007184509
## 3                         2.000457663
##   total durable medical equipment expenditure
## 1                                  -0.8482508
## 2                                   0.3528186
## 3                                   1.6216437
##   Total Nursing care facilities and Continuing Care
## 1                                        -0.8633809
## 2                                         0.4482117
## 3                                         1.7903298
##   Total Other health,Residential,and Personal care expenditure
## 1                                                   -0.8029462
## 2                                                    0.2259361
## 3                                                    1.8793413
##   Net cost of Health Insurance expenditure
## 1                              -0.78002155
## 2                               0.06723409
## 3                               1.81013791
#plot result
plot(fit.average, hang=-1, cex=.8, main="Average Linkage Clustering\n3 Cluster Solution")
rect.hclust(fit.average, k=3)

Partitioning around medoids (PAM); after specified k, the number of clusters sought, observations are then randomly divided into k groups and reshuffled to form cohesive clusters.

  1. Are there subtypes in the healthcare expenditures group by year?
  2. If so, how many subtypes are there, and what are the charateristics?
# k-means clustering

wssplot <- function(data, nc=15, seed=1234){
  wss <- (nrow(data))*sum(apply(data,2,var))
  for (i in 2:nc){ set.seed(seed)
                   wss[i] <- sum(kmeans(data, centers=i)$withinss)}
  plot(1:nc, wss, type="b", xlab="Number of Clusters",
       ylab="Within groups sum of squares")
}


data(nhe.data.final)
## Warning in data(nhe.data.final): data set 'nhe.data.final' not found
df <- scale(nhe.data.final[,-1])
head(df)
##      Out of Pocket Private Health Insurance Public Health activity
## 1960     -1.105333               -0.9076818             -0.9389088
## 1961     -1.101735               -0.9055781             -0.9374349
## 1962     -1.093232               -0.9032968             -0.9356119
## 1963     -1.082935               -0.9007526             -0.9335561
## 1964     -1.067228               -0.8971533             -0.9311125
## 1965     -1.054807               -0.8938071             -0.9292119
##      Investment Total CMS programs Total Hospital Espenditure
## 1960 -1.0079907         -0.8710279                 -0.9819176
## 1961 -1.0023062         -0.8710279                 -0.9790171
## 1962 -0.9902482         -0.8710279                 -0.9766183
## 1963 -0.9843341         -0.8710279                 -0.9726814
## 1964 -0.9727354         -0.8710279                 -0.9690411
## 1965 -0.9659026         -0.8710279                 -0.9652177
##      Total Physician and Clinical Expenditure
## 1960                               -0.9500581
## 1961                               -0.9488910
## 1962                               -0.9466118
## 1963                               -0.9421030
## 1964                               -0.9364106
## 1965                               -0.9337791
##      Total Home Health care Expenditure
## 1960                         -0.8311797
## 1961                         -0.8310176
## 1962                         -0.8308555
## 1963                         -0.8306934
## 1964                         -0.8304502
## 1965                         -0.8298829
##      Other Non-Durable medical Products Expenditure
## 1960                                      -1.106461
## 1961                                      -1.098308
## 1962                                      -1.090213
## 1963                                      -1.087672
## 1964                                      -1.079459
## 1965                                      -1.071897
##      total prescription drug Expenditure
## 1960                          -0.7811601
## 1961                          -0.7806971
## 1962                          -0.7772683
## 1963                          -0.7758351
## 1964                          -0.7737624
## 1965                          -0.7697053
##      total durable medical equipment expenditure
## 1960                                  -0.9823951
## 1961                                  -0.9804961
## 1962                                  -0.9694820
## 1963                                  -0.9701656
## 1964                                  -0.9623418
## 1965                                  -0.9546699
##      Total Nursing care facilities and Continuing Care
## 1960                                        -0.9867131
## 1961                                        -0.9861083
## 1962                                        -0.9854026
## 1963                                        -0.9827210
## 1964                                        -0.9795152
## 1965                                        -0.9746763
##      Total Other health,Residential,and Personal care expenditure
## 1960                                                   -0.8466885
## 1961                                                   -0.8451917
## 1962                                                   -0.8444321
## 1963                                                   -0.8432704
## 1964                                                   -0.8419077
## 1965                                                   -0.8405672
##      Net cost of Health Insurance expenditure
## 1960                               -0.8247760
## 1961                               -0.8232873
## 1962                               -0.8212031
## 1963                               -0.8204960
## 1964                               -0.8181885
## 1965                               -0.8138898
wssplot(df)

set.seed(1234)
devAskNewPage(ask=TRUE)
nc <- NbClust(df, min.nc=2, max.nc=15, method="kmeans")

## *** : The Hubert index is a graphical method of determining the number of clusters.
##                 In the plot of Hubert index, we seek a significant knee that corresponds to a 
##                 significant increase of the value of the measure i.e the significant peak in Hubert
##                 index second differences plot. 
## 

## *** : The D index is a graphical method of determining the number of clusters. 
##                 In the plot of D index, we seek a significant knee (the significant peak in Dindex
##                 second differences plot) that corresponds to a significant increase of the value of
##                 the measure. 
##  
## ******************************************************************* 
## * Among all indices:                                                
## * 5 proposed 2 as the best number of clusters 
## * 9 proposed 3 as the best number of clusters 
## * 1 proposed 5 as the best number of clusters 
## * 1 proposed 6 as the best number of clusters 
## * 1 proposed 7 as the best number of clusters 
## * 1 proposed 9 as the best number of clusters 
## * 1 proposed 10 as the best number of clusters 
## * 5 proposed 13 as the best number of clusters 
## 
##                    ***** Conclusion *****                            
##  
## * According to the majority rule, the best number of clusters is  3 
##  
##  
## *******************************************************************
table(nc$Best.n[1,])
## 
##  0  2  3  5  6  7  9 10 13 
##  2  5  9  1  1  1  1  1  5
barplot(table(nc$Best.n[1,]), xlab="Number of Clusters", ylab="Number of Criteria", main="number of clusters chosen by 26 Criteria")




set.seed(1234)
fit.km <- kmeans(df, 3)
fit.km$size
## [1] 29 11 14
fit.km$centers
##   Out of Pocket Private Health Insurance Public Health activity Investment
## 1    -0.7853660               -0.7587379             -0.7789483 -0.7697399
## 2     1.6079349                1.7020635              1.6487423  1.6696562
## 3     0.3634521                0.2343357              0.3180954  0.2825886
##   Total CMS programs Total Hospital Espenditure
## 1         -0.7333043                 -0.7567664
## 2          1.7088293                  1.6659210
## 3          0.1763359                  0.2586495
##   Total Physician and Clinical Expenditure
## 1                               -0.7754078
## 2                                1.6616313
## 3                                0.3006344
##   Total Home Health care Expenditure
## 1                         -0.7581139
## 2                          1.6500245
## 3                          0.2739310
##   Other Non-Durable medical Products Expenditure
## 1                                     -0.7823971
## 2                                      1.5909020
## 3                                      0.3706854
##   total prescription drug Expenditure
## 1                         -0.69801013
## 2                          1.78321454
## 3                          0.04478099
##   total durable medical equipment expenditure
## 1                                  -0.7840468
## 2                                   1.6215816
## 3                                   0.3499972
##   Total Nursing care facilities and Continuing Care
## 1                                        -0.7853399
## 2                                         1.6247009
## 3                                         0.3502248
##   Total Other health,Residential,and Personal care expenditure
## 1                                                   -0.7320088
## 2                                                    1.7134443
## 3                                                    0.1700263
##   Net cost of Health Insurance expenditure
## 1                              -0.71108826
## 2                               1.78410570
## 3                               0.07117121
aggregate(nhe.data.final[,-1], by=list(cluster=fit.km$cluster), mean)
##   cluster Out of Pocket Private Health Insurance Public Health activity
## 1       1      46051.24                 51091.03                4495.00
## 2       2     292888.00                799721.27               67084.18
## 3       3     164536.36                353206.00               32778.29
##   Investment Total CMS programs Total Hospital Espenditure
## 1    15014.0           43501.86                   70463.66
## 2   142466.3          814882.91                  731990.45
## 3    69995.5          330824.07                  347728.00
##   Total Physician and Clinical Expenditure
## 1                                 37354.38
## 2                                480030.55
## 3                                232812.14
##   Total Home Health care Expenditure
## 1                           1859.897
## 2                          61280.636
## 3                          27325.571
##   Other Non-Durable medical Products Expenditure
## 1                                       7110.724
## 2                                      47278.364
## 3                                      26626.429
##   total prescription drug Expenditure
## 1                            10218.07
## 2                           235276.09
## 3                            77592.50
##   total durable medical equipment expenditure
## 1                                    3351.241
## 2                                   35021.182
## 3                                   18280.857
##   Total Nursing care facilities and Continuing Care
## 1                                          10798.66
## 2                                         130331.18
## 3                                          67120.07
##   Total Other health,Residential,and Personal care expenditure
## 1                                                      5584.31
## 2                                                    115048.09
## 3                                                     45961.36
##   Net cost of Health Insurance expenditure
## 1                                  7122.31
## 2                                141208.09
## 3                                 49159.07
#PAM

set.seed(1234)
fit.pam <- pam(nhe.data.final, k=3, stand=TRUE)

fit.pam$medoids
##      Year Out of Pocket Private Health Insurance Public Health activity
## 1973 1973         31716                    22884                   2123
## 1994 1994        143375                   309592                  29591
## 2008 2008        300870                   808027                  71516
##      Investment Total CMS programs Total Hospital Espenditure
## 1973      10244              20153                      37920
## 1994      63926             302084                     328366
## 2008     155264             821951                     728949
##      Total Physician and Clinical Expenditure
## 1973                                    19559
## 1994                                   212178
## 2008                                   486463
##      Total Home Health care Expenditure
## 1973                                276
## 1994                              27375
## 2008                              62291
##      Other Non-Durable medical Products Expenditure
## 1973                                           3983
## 1994                                          24310
## 2008                                          49472
##      total prescription drug Expenditure
## 1973                                6817
## 1994                               53059
## 2008                              242724
##      total durable medical equipment expenditure
## 1973                                        2247
## 1994                                       15314
## 2008                                       34897
##      Total Nursing care facilities and Continuing Care
## 1973                                              5974
## 1994                                             58634
## 2008                                            132570
##      Total Other health,Residential,and Personal care expenditure
## 1973                                                         2019
## 1994                                                        37963
## 2008                                                       113549
##      Net cost of Health Insurance expenditure
## 1973                                     3715
## 1994                                    45100
## 2008                                   140651
clusplot(fit.pam, main="Bivariate Cluster Plot")

Conclusion

  1. Partitioning clustering by k-means and PAM shows Similar clustering to Agglomerative hierarchical clustering

  2. Based on the clustering, the next analyses should find out what are the cause of different increasing rates among the variables such as prescription drug expensditure.

#ct.pam <- table(nhe.data.final$Year, fit.pam$clustering)

#randIndex(ct.pam)



#NHE2013_df <- read.csv("~/IS 607/FINAL PROJECT/607FinalProject/DATA_NHE2013.csv",header=T)

#str(NHE2013_df)
#s1_NHE2013 <- NHE2013_df[,C(3,4)]

#View(DATA_NHE2013)
#summary(NHE2013_df)

#str(s1_NHE2013)

#s1_NHE2013 <- data.frame(DATA_NHE2013(,C[1,3,4,5]))

#head(s1_NHE2013)

#d <- dist(DATA_NHE2013)
#as.matrix(d)[3:5, 1:5]