Sys.setenv(JAVA_HOME="C:\\Program Files\\Java\\jdk1.8.0_45\\jre")
#Sys.setenv(JAVA_HOME="C:\\Users\\jamey\\Documents\\R\\win-library\\3.1\\rJava\\libs\\x64\\rJava.dll")
library(rJava)
## Warning: package 'rJava' was built under R version 3.1.3
library(xlsx)
## Warning: package 'xlsx' was built under R version 3.1.3
## Loading required package: xlsxjars
## Warning: package 'xlsxjars' was built under R version 3.1.3
library(rvest)
## Warning: package 'rvest' was built under R version 3.1.3
library(plyr)
## Warning: package 'plyr' was built under R version 3.1.3
library(tidyr)
## Warning: package 'tidyr' was built under R version 3.1.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.1.3
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#install.packages("cluster")
library(cluster)
#install.packages("NbClust")
library(NbClust)
## Warning: package 'NbClust' was built under R version 3.1.3
#install.packages("flexclust")
library(flexclust)
## Warning: package 'flexclust' was built under R version 3.1.3
## Loading required package: grid
## Loading required package: lattice
## Loading required package: modeltools
## Warning: package 'modeltools' was built under R version 3.1.3
## Loading required package: stats4
##
## Attaching package: 'modeltools'
##
## The following object is masked from 'package:plyr':
##
## empty
##
## The following object is masked from 'package:rJava':
##
## clone
#install.packages("fMultivar")
library(fMultivar)
## Warning: package 'fMultivar' was built under R version 3.1.3
## Loading required package: timeDate
## Warning: package 'timeDate' was built under R version 3.1.3
## Loading required package: timeSeries
## Warning: package 'timeSeries' was built under R version 3.1.3
## Loading required package: fBasics
## Warning: package 'fBasics' was built under R version 3.1.3
##
##
## Rmetrics Package fBasics
## Analysing Markets and calculating Basic Statistics
## Copyright (C) 2005-2014 Rmetrics Association Zurich
## Educational Software for Financial Engineering and Computational Science
## Rmetrics is free software and comes with ABSOLUTELY NO WARRANTY.
## https://www.rmetrics.org --- Mail to: info@rmetrics.org
##
## Attaching package: 'fBasics'
##
## The following object is masked from 'package:flexclust':
##
## getModel
##
## The following object is masked from 'package:modeltools':
##
## getModel
##
##
##
## Rmetrics Package fMultivar
## Analysing and Modeling Multivariate Financial Return Distributions
## Copyright (C) 2005-2014 Rmetrics Association Zurich
## Educational Software for Financial Engineering and Computational Science
## Rmetrics is free software and comes with ABSOLUTELY NO WARRANTY.
## https://www.rmetrics.org --- Mail to: info@rmetrics.org
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.1.3
#install.packages("rattle")
library(rattle)
## Warning: package 'rattle' was built under R version 3.1.3
## Rattle: A free graphical interface for data mining with R.
## Version 3.4.1 Copyright (c) 2006-2014 Togaware Pty Ltd.
## Type 'rattle()' to shake, rattle, and roll your data.
#url.nhe <- "http://www.cms.gov/Research-Statistics-Data-and-Systems/Statistics-Trends-and-Reports/NationalHealthExpendData/Downloads/NHE2013.zip"
#td <- tempdir()
#tf <- tempfile(tmpdir=td, fileext=".zip")
#download.file(url.nhe, tf)
#Read the data into a data frame
nhe.data <- read.xlsx("~/IS 607/FINAL PROJECT/NHE2013.xls",stringsAsFactors=F, header=FALSE,colClasses="integer", startRow=2, rowIndex=c(2,4,6,38,32,35,103,133,223,253,283,313,343,373,493), colIndex=NULL,as.data.frame=TRUE,sheetIndex=1)
#unlink the file handle
#unlink(tf)
#Make rows as colums and columns as rows
nhe.data.final <-t(nhe.data)
View(nhe.data.final)
str(nhe.data.final)
## num [1:55, 1:15] NA 1960 1961 1962 1963 ...
## - attr(*, "dimnames")=List of 2
## ..$ : chr [1:55] "X1" "X2" "X3" "X4" ...
## ..$ : NULL
#Nullify row names
rownames(nhe.data.final) <- NULL
#Make the row 1 as the column names
#colnames(nhe.data.final) <- nhe.data.final[1,]
#delete the new row 1
nhe.data.final = nhe.data.final[-1,]
#Nameing columns and rowname
colnames(nhe.data.final) <- c("Year","Out of Pocket","Private Health Insurance","Public Health activity","Investment","Total CMS programs","Total Hospital Espenditure","Total Physician and Clinical Expenditure","Total Home Health care Expenditure","Other Non-Durable medical Products Expenditure","total prescription drug Expenditure","total durable medical equipment expenditure","Total Nursing care facilities and Continuing Care","Total Other health,Residential,and Personal care expenditure","Net cost of Health Insurance expenditure")
rownames(nhe.data.final) <- nhe.data.final[,1]
#Convert char to number column 2:16
#nhe.data.final<- colClasses("Integer")
head(nhe.data.final)
## Year Out of Pocket Private Health Insurance Public Health activity
## 1960 1960 13051 5779 371
## 1961 1961 13422 6419 409
## 1962 1962 14299 7113 456
## 1963 1963 15361 7887 509
## 1964 1964 16981 8982 572
## 1965 1965 18262 10000 621
## Investment Total CMS programs Total Hospital Espenditure
## 1960 2566 0 8985
## 1961 2863 0 9777
## 1962 3493 0 10432
## 1963 3802 0 11507
## 1964 4408 0 12501
## 1965 4765 0 13545
## Total Physician and Clinical Expenditure
## 1960 5630
## 1961 5842
## 1962 6256
## 1963 7075
## 1964 8109
## 1965 8587
## Total Home Health care Expenditure
## 1960 57
## 1961 61
## 1962 65
## 1963 69
## 1964 75
## 1965 89
## Other Non-Durable medical Products Expenditure
## 1960 1626
## 1961 1764
## 1962 1901
## 1963 1944
## 1964 2083
## 1965 2211
## total prescription drug Expenditure
## 1960 2676
## 1961 2718
## 1962 3029
## 1963 3159
## 1964 3347
## 1965 3715
## total durable medical equipment expenditure
## 1960 740
## 1961 765
## 1962 910
## 1963 901
## 1964 1004
## 1965 1105
## Total Nursing care facilities and Continuing Care
## 1960 811
## 1961 841
## 1962 876
## 1963 1009
## 1964 1168
## 1965 1408
## Total Other health,Residential,and Personal care expenditure
## 1960 451
## 1961 518
## 1962 552
## 1963 604
## 1964 665
## 1965 725
## Net cost of Health Insurance expenditure
## 1960 1013
## 1961 1093
## 1962 1205
## 1963 1243
## 1964 1367
## 1965 1598
str(nhe.data.final)
## num [1:54, 1:15] 1960 1961 1962 1963 1964 ...
## - attr(*, "dimnames")=List of 2
## ..$ : chr [1:54] "1960" "1961" "1962" "1963" ...
## ..$ : chr [1:15] "Year" "Out of Pocket" "Private Health Insurance" "Public Health activity" ...
summary(nhe.data.final)
## Year Out of Pocket Private Health Insurance
## Min. :1960 Min. : 13051 Min. : 5779
## 1st Qu.:1973 1st Qu.: 32404 1st Qu.: 23656
## Median :1986 Median :107098 Median :142618
## Mean :1986 Mean :127051 Mean :281916
## 3rd Qu.:2000 3rd Qu.:198633 3rd Qu.:449874
## Max. :2013 Max. :339422 Max. :961741
## Public Health activity Investment Total CMS programs
## Min. : 371 Min. : 2566 Min. : 0
## 1st Qu.: 2238 1st Qu.: 10491 1st Qu.: 21240
## Median :12980 Median : 34249 Median : 127816
## Mean :24577 Mean : 55231 Mean : 275126
## 3rd Qu.:42468 3rd Qu.: 87416 3rd Qu.: 420850
## Max. :75480 Max. :164594 Max. :1048584
## Total Hospital Espenditure Total Physician and Clinical Expenditure
## Min. : 8985 Min. : 5630
## 1st Qu.: 39475 1st Qu.: 20223
## Median :182587 Median :106811
## Mean :277103 Mean :178203
## 3rd Qu.:410054 3rd Qu.:286132
## Max. :936867 Max. :586675
## Total Home Health care Expenditure
## Min. : 57.0
## 1st Qu.: 312.8
## Median : 6524.0
## Mean :20566.3
## 3rd Qu.:35471.5
## Max. :79772.0
## Other Non-Durable medical Products Expenditure
## Min. : 1626
## 1st Qu.: 4110
## Median :17664
## Mean :20353
## 3rd Qu.:31332
## Max. :55892
## total prescription drug Expenditure
## Min. : 2676
## 1st Qu.: 6968
## Median : 25589
## Mean : 73531
## 3rd Qu.:117070
## Max. :271096
## total durable medical equipment expenditure
## Min. : 740
## 1st Qu.: 2312
## Median : 8766
## Mean :13673
## 3rd Qu.:24597
## Max. :42987
## Total Nursing care facilities and Continuing Care
## Min. : 811
## 1st Qu.: 6212
## Median : 29722
## Mean : 49750
## 3rd Qu.: 84038
## Max. :155829
## Total Other health,Residential,and Personal care expenditure
## Min. : 451
## 1st Qu.: 2116
## Median : 16638
## Mean : 38351
## 3rd Qu.: 63327
## Max. :148230
## Net cost of Health Insurance expenditure
## Min. : 1013
## 1st Qu.: 3396
## Median : 20142
## Mean : 45335
## 3rd Qu.: 62149
## Max. :173615
Agglomerative hierarchical clustering; each year starts as its own cluster. Clusters are then combined two as a time, until all clusters are merged into a single cluster
What are the similaries and differences from 1960 through 2013 healthcare expenditures based on 15 expense categories
can we group certain years as a subgroup into which the expenditures are meaningfully clustered?
data(nhe.data.final, package="flexclust")
## Warning in data(nhe.data.final, package = "flexclust"): data set
## 'nhe.data.final' not found
row.names(nhe.data.final) <- tolower(row.names(nhe.data.final))
# Variables vary in range. scale() function standardize the variables to a mean of zero and a standard deviation of one (x-mean(x)/sd(x)).
nhe.scaled <- scale(nhe.data.final[,-1])
d <- dist(nhe.scaled)
fit.average <- hclust(d, method="average")
fit.single <- hclust(d, method="single")
#The height dimention indicates the average distance between each point in one cluster and each point in the other cluster.
plot(fit.average, hang=-1, cex.axis=2, main="Average Linkage Clustering")
plot(fit.single, hang=-1, cex.axis=2, main="Single Linkage Clustering")
Conclusion
1.The increasing rates in each variable are similarites and differences
2.From the plot, 1960 - 1988 group one cluster, 1989 - 2004 group one cluster, and 2005 - 2013 group one cluster.
#Selecting the number of clusters
devAskNewPage(ask=TRUE)
nc <- NbClust(nhe.scaled, distance="euclidean",
min.nc=2, max.nc=15, method="average")
## [1] "Frey index : No clustering structure in this data set"
## *** : The Hubert index is a graphical method of determining the number of clusters.
## In the plot of Hubert index, we seek a significant knee that corresponds to a
## significant increase of the value of the measure i.e the significant peak in Hubert
## index second differences plot.
##
## *** : The D index is a graphical method of determining the number of clusters.
## In the plot of D index, we seek a significant knee (the significant peak in Dindex
## second differences plot) that corresponds to a significant increase of the value of
## the measure.
##
## *******************************************************************
## * Among all indices:
## * 4 proposed 2 as the best number of clusters
## * 9 proposed 3 as the best number of clusters
## * 1 proposed 4 as the best number of clusters
## * 1 proposed 9 as the best number of clusters
## * 3 proposed 10 as the best number of clusters
## * 1 proposed 14 as the best number of clusters
## * 3 proposed 15 as the best number of clusters
##
## ***** Conclusion *****
##
## * According to the majority rule, the best number of clusters is 3
##
##
## *******************************************************************
table(nc$Best.n[1,])
##
## 0 2 3 4 9 10 14 15
## 2 4 9 1 1 3 1 3
# Obtaining the final cluster solution
clusters <- cutree(fit.average, k=3) # most voted cluster number.
table(clusters)
## clusters
## 1 2 3
## 29 16 9
# Describe clusters
aggregate(nhe.data.final[,-1], by=list(cluster=clusters), median)
## cluster Out of Pocket Private Health Insurance Public Health activity
## 1 1 34467 25972 2584
## 2 2 157739 354414 33600
## 3 3 300914 833076 73480
## Investment Total CMS programs Total Hospital Espenditure
## 1 11233.0 24501.0 44139.0
## 2 69985.5 361072.5 357101.5
## 3 149678.0 885775.0 776831.0
## Total Physician and Clinical Expenditure
## 1 22215
## 2 237075
## 3 503176
## Total Home Health care Expenditure
## 1 423
## 2 32648
## 3 67249
## Other Non-Durable medical Products Expenditure
## 1 4491.0
## 2 26792.5
## 3 50328.0
## total prescription drug Expenditure
## 1 7422
## 2 72879
## 3 254981
## total durable medical equipment expenditure
## 1 2506
## 2 18318
## 3 35022
## Total Nursing care facilities and Continuing Care
## 1 6928
## 2 71980
## 3 138546
## Total Other health,Residential,and Personal care expenditure
## 1 2409
## 2 48464
## 3 122474
## Net cost of Health Insurance expenditure
## 1 3418.0
## 2 48947.5
## 3 142607.0
aggregate(as.data.frame(nhe.scaled), by=list(cluster=clusters),median)
## cluster Out of Pocket Private Health Insurance Public Health activity
## 1 1 -0.8976855 -0.8413059 -0.8530716
## 2 2 0.2975457 0.2383065 0.3499678
## 3 3 1.6857541 1.8117029 1.8968215
## Investment Total CMS programs Total Hospital Espenditure
## 1 -0.8421071 -0.7934596 -0.8531743
## 2 0.2823972 0.2721001 0.2929778
## 3 1.8076864 1.9332682 1.8301391
## Total Physician and Clinical Expenditure
## 1 -0.8587537
## 2 0.3241024
## 3 1.7890526
## Total Home Health care Expenditure
## 1 -0.8163469
## 2 0.4896325
## 3 1.8919038
## Other Non-Durable medical Products Expenditure
## 1 -0.9371832
## 2 0.3804977
## 3 1.7710893
## total prescription drug Expenditure
## 1 -0.728836296
## 2 -0.007184509
## 3 2.000457663
## total durable medical equipment expenditure
## 1 -0.8482508
## 2 0.3528186
## 3 1.6216437
## Total Nursing care facilities and Continuing Care
## 1 -0.8633809
## 2 0.4482117
## 3 1.7903298
## Total Other health,Residential,and Personal care expenditure
## 1 -0.8029462
## 2 0.2259361
## 3 1.8793413
## Net cost of Health Insurance expenditure
## 1 -0.78002155
## 2 0.06723409
## 3 1.81013791
#plot result
plot(fit.average, hang=-1, cex=.8, main="Average Linkage Clustering\n3 Cluster Solution")
rect.hclust(fit.average, k=3)
Partitioning around medoids (PAM); after specified k, the number of clusters sought, observations are then randomly divided into k groups and reshuffled to form cohesive clusters.
# k-means clustering
wssplot <- function(data, nc=15, seed=1234){
wss <- (nrow(data))*sum(apply(data,2,var))
for (i in 2:nc){ set.seed(seed)
wss[i] <- sum(kmeans(data, centers=i)$withinss)}
plot(1:nc, wss, type="b", xlab="Number of Clusters",
ylab="Within groups sum of squares")
}
data(nhe.data.final)
## Warning in data(nhe.data.final): data set 'nhe.data.final' not found
df <- scale(nhe.data.final[,-1])
head(df)
## Out of Pocket Private Health Insurance Public Health activity
## 1960 -1.105333 -0.9076818 -0.9389088
## 1961 -1.101735 -0.9055781 -0.9374349
## 1962 -1.093232 -0.9032968 -0.9356119
## 1963 -1.082935 -0.9007526 -0.9335561
## 1964 -1.067228 -0.8971533 -0.9311125
## 1965 -1.054807 -0.8938071 -0.9292119
## Investment Total CMS programs Total Hospital Espenditure
## 1960 -1.0079907 -0.8710279 -0.9819176
## 1961 -1.0023062 -0.8710279 -0.9790171
## 1962 -0.9902482 -0.8710279 -0.9766183
## 1963 -0.9843341 -0.8710279 -0.9726814
## 1964 -0.9727354 -0.8710279 -0.9690411
## 1965 -0.9659026 -0.8710279 -0.9652177
## Total Physician and Clinical Expenditure
## 1960 -0.9500581
## 1961 -0.9488910
## 1962 -0.9466118
## 1963 -0.9421030
## 1964 -0.9364106
## 1965 -0.9337791
## Total Home Health care Expenditure
## 1960 -0.8311797
## 1961 -0.8310176
## 1962 -0.8308555
## 1963 -0.8306934
## 1964 -0.8304502
## 1965 -0.8298829
## Other Non-Durable medical Products Expenditure
## 1960 -1.106461
## 1961 -1.098308
## 1962 -1.090213
## 1963 -1.087672
## 1964 -1.079459
## 1965 -1.071897
## total prescription drug Expenditure
## 1960 -0.7811601
## 1961 -0.7806971
## 1962 -0.7772683
## 1963 -0.7758351
## 1964 -0.7737624
## 1965 -0.7697053
## total durable medical equipment expenditure
## 1960 -0.9823951
## 1961 -0.9804961
## 1962 -0.9694820
## 1963 -0.9701656
## 1964 -0.9623418
## 1965 -0.9546699
## Total Nursing care facilities and Continuing Care
## 1960 -0.9867131
## 1961 -0.9861083
## 1962 -0.9854026
## 1963 -0.9827210
## 1964 -0.9795152
## 1965 -0.9746763
## Total Other health,Residential,and Personal care expenditure
## 1960 -0.8466885
## 1961 -0.8451917
## 1962 -0.8444321
## 1963 -0.8432704
## 1964 -0.8419077
## 1965 -0.8405672
## Net cost of Health Insurance expenditure
## 1960 -0.8247760
## 1961 -0.8232873
## 1962 -0.8212031
## 1963 -0.8204960
## 1964 -0.8181885
## 1965 -0.8138898
wssplot(df)
set.seed(1234)
devAskNewPage(ask=TRUE)
nc <- NbClust(df, min.nc=2, max.nc=15, method="kmeans")
## *** : The Hubert index is a graphical method of determining the number of clusters.
## In the plot of Hubert index, we seek a significant knee that corresponds to a
## significant increase of the value of the measure i.e the significant peak in Hubert
## index second differences plot.
##
## *** : The D index is a graphical method of determining the number of clusters.
## In the plot of D index, we seek a significant knee (the significant peak in Dindex
## second differences plot) that corresponds to a significant increase of the value of
## the measure.
##
## *******************************************************************
## * Among all indices:
## * 5 proposed 2 as the best number of clusters
## * 9 proposed 3 as the best number of clusters
## * 1 proposed 5 as the best number of clusters
## * 1 proposed 6 as the best number of clusters
## * 1 proposed 7 as the best number of clusters
## * 1 proposed 9 as the best number of clusters
## * 1 proposed 10 as the best number of clusters
## * 5 proposed 13 as the best number of clusters
##
## ***** Conclusion *****
##
## * According to the majority rule, the best number of clusters is 3
##
##
## *******************************************************************
table(nc$Best.n[1,])
##
## 0 2 3 5 6 7 9 10 13
## 2 5 9 1 1 1 1 1 5
barplot(table(nc$Best.n[1,]), xlab="Number of Clusters", ylab="Number of Criteria", main="number of clusters chosen by 26 Criteria")
set.seed(1234)
fit.km <- kmeans(df, 3)
fit.km$size
## [1] 29 11 14
fit.km$centers
## Out of Pocket Private Health Insurance Public Health activity Investment
## 1 -0.7853660 -0.7587379 -0.7789483 -0.7697399
## 2 1.6079349 1.7020635 1.6487423 1.6696562
## 3 0.3634521 0.2343357 0.3180954 0.2825886
## Total CMS programs Total Hospital Espenditure
## 1 -0.7333043 -0.7567664
## 2 1.7088293 1.6659210
## 3 0.1763359 0.2586495
## Total Physician and Clinical Expenditure
## 1 -0.7754078
## 2 1.6616313
## 3 0.3006344
## Total Home Health care Expenditure
## 1 -0.7581139
## 2 1.6500245
## 3 0.2739310
## Other Non-Durable medical Products Expenditure
## 1 -0.7823971
## 2 1.5909020
## 3 0.3706854
## total prescription drug Expenditure
## 1 -0.69801013
## 2 1.78321454
## 3 0.04478099
## total durable medical equipment expenditure
## 1 -0.7840468
## 2 1.6215816
## 3 0.3499972
## Total Nursing care facilities and Continuing Care
## 1 -0.7853399
## 2 1.6247009
## 3 0.3502248
## Total Other health,Residential,and Personal care expenditure
## 1 -0.7320088
## 2 1.7134443
## 3 0.1700263
## Net cost of Health Insurance expenditure
## 1 -0.71108826
## 2 1.78410570
## 3 0.07117121
aggregate(nhe.data.final[,-1], by=list(cluster=fit.km$cluster), mean)
## cluster Out of Pocket Private Health Insurance Public Health activity
## 1 1 46051.24 51091.03 4495.00
## 2 2 292888.00 799721.27 67084.18
## 3 3 164536.36 353206.00 32778.29
## Investment Total CMS programs Total Hospital Espenditure
## 1 15014.0 43501.86 70463.66
## 2 142466.3 814882.91 731990.45
## 3 69995.5 330824.07 347728.00
## Total Physician and Clinical Expenditure
## 1 37354.38
## 2 480030.55
## 3 232812.14
## Total Home Health care Expenditure
## 1 1859.897
## 2 61280.636
## 3 27325.571
## Other Non-Durable medical Products Expenditure
## 1 7110.724
## 2 47278.364
## 3 26626.429
## total prescription drug Expenditure
## 1 10218.07
## 2 235276.09
## 3 77592.50
## total durable medical equipment expenditure
## 1 3351.241
## 2 35021.182
## 3 18280.857
## Total Nursing care facilities and Continuing Care
## 1 10798.66
## 2 130331.18
## 3 67120.07
## Total Other health,Residential,and Personal care expenditure
## 1 5584.31
## 2 115048.09
## 3 45961.36
## Net cost of Health Insurance expenditure
## 1 7122.31
## 2 141208.09
## 3 49159.07
#PAM
set.seed(1234)
fit.pam <- pam(nhe.data.final, k=3, stand=TRUE)
fit.pam$medoids
## Year Out of Pocket Private Health Insurance Public Health activity
## 1973 1973 31716 22884 2123
## 1994 1994 143375 309592 29591
## 2008 2008 300870 808027 71516
## Investment Total CMS programs Total Hospital Espenditure
## 1973 10244 20153 37920
## 1994 63926 302084 328366
## 2008 155264 821951 728949
## Total Physician and Clinical Expenditure
## 1973 19559
## 1994 212178
## 2008 486463
## Total Home Health care Expenditure
## 1973 276
## 1994 27375
## 2008 62291
## Other Non-Durable medical Products Expenditure
## 1973 3983
## 1994 24310
## 2008 49472
## total prescription drug Expenditure
## 1973 6817
## 1994 53059
## 2008 242724
## total durable medical equipment expenditure
## 1973 2247
## 1994 15314
## 2008 34897
## Total Nursing care facilities and Continuing Care
## 1973 5974
## 1994 58634
## 2008 132570
## Total Other health,Residential,and Personal care expenditure
## 1973 2019
## 1994 37963
## 2008 113549
## Net cost of Health Insurance expenditure
## 1973 3715
## 1994 45100
## 2008 140651
clusplot(fit.pam, main="Bivariate Cluster Plot")
Conclusion
Partitioning clustering by k-means and PAM shows Similar clustering to Agglomerative hierarchical clustering
Based on the clustering, the next analyses should find out what are the cause of different increasing rates among the variables such as prescription drug expensditure.
#ct.pam <- table(nhe.data.final$Year, fit.pam$clustering)
#randIndex(ct.pam)
#NHE2013_df <- read.csv("~/IS 607/FINAL PROJECT/607FinalProject/DATA_NHE2013.csv",header=T)
#str(NHE2013_df)
#s1_NHE2013 <- NHE2013_df[,C(3,4)]
#View(DATA_NHE2013)
#summary(NHE2013_df)
#str(s1_NHE2013)
#s1_NHE2013 <- data.frame(DATA_NHE2013(,C[1,3,4,5]))
#head(s1_NHE2013)
#d <- dist(DATA_NHE2013)
#as.matrix(d)[3:5, 1:5]