R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

str(a)
## Classes 'tbl_df', 'tbl' and 'data.frame':    1465 obs. of  7 variables:
##  $ Indicator Category: chr  "Behavioral Health/Substance Abuse" "Behavioral Health/Substance Abuse" "Behavioral Health/Substance Abuse" "Behavioral Health/Substance Abuse" ...
##  $ Indicator         : chr  "Opioid-Related Unintentional Drug Overdose Mortality Rate (Age-Adjusted; Per 100,000 people)" "Opioid-Related Unintentional Drug Overdose Mortality Rate (Age-Adjusted; Per 100,000 people)" "Opioid-Related Unintentional Drug Overdose Mortality Rate (Age-Adjusted; Per 100,000 people)" "Opioid-Related Unintentional Drug Overdose Mortality Rate (Age-Adjusted; Per 100,000 people)" ...
##  $ Year              : num  2010 2010 2010 2010 2010 2010 2010 2010 2010 2010 ...
##  $ Sex               : chr  "Both" "Both" "Both" "Both" ...
##  $ Race/Ethnicity    : chr  "All" "All" "All" "All" ...
##  $ Value             : num  1.7 2.2 2.3 3 4.4 5.2 5.4 6.3 11.3 11.8 ...
##  $ Place             : chr  "Washington, DC" "Fort Worth (Tarrant County), TX" "Oakland (Alameda County), CA" "San Antonio, TX" ...
head(a)
hist(a$Year)

dotchart(jitter(x=a$Year))

##barplot(a$Sex)

w<-table(a$Sex, a$Year)
barplot(w, legend=rownames(w), main = )

w<-table(a$Sex, a$Year)
barplot(w, main = "Comparison of Patients")

plot(w, binwidth=5, xlab = "Years", ylab = "Sex", Main= "Years")
## Warning: In mosaicplot.default(x, xlab = xlab, ylab = ylab, main = xnam, 
##     ...) :
##  extra arguments 'binwidth', 'Main' will be disregarded

p <- ggplot(data = mtcars, aes(x= mpg))
p+geom_density()

p+geom_dotplot()
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.

p+geom_area(stat = "bin")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

p+geom_area(stat = "bin", bins = 6)

p+geom_freqpoly()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

##ggplot(data=a, aes(x=value))+geom_density()

Including Plots

You can also embed plots, for example:

## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## The following objects are masked from 'package:Hmisc':
## 
##     is.discrete, summarize
## The following objects are masked from 'package:reshape':
## 
##     rename, round_any
## The following objects are masked from 'package:plotly':
## 
##     arrange, mutate, rename, summarise
## The following object is masked from 'package:lubridate':
## 
##     here
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following object is masked from 'package:purrr':
## 
##     compact
## 
##                    Baltimore, MD                       Boston, MA 
##                               19                               64 
##                    Charlotte, NC                      Chicago, Il 
##                               50                               16 
##                     Columbus, OH                       Denver, CO 
##                               98                               96 
##                      Detroit, MI  Fort Worth (Tarrant County), TX 
##                               32                               50 
##                      Houston, TX Indianapolis (Marion County), IN 
##                               46                               21 
##                  Kansas City, MO     Las Vegas (Clark County), NV 
##                               38                              100 
##                   Long Beach, CA                  Los Angeles, CA 
##                                2                               35 
##    Miami (Miami-Dade County), FL                  Minneapolis, MN 
##                               48                               54 
##                New York City, NY     Oakland (Alameda County), CA 
##                               64                               64 
##                 Philadelphia, PA                      Phoenix, AZ 
##                               63                               47 
##  Portland (Multnomah County), OR                  San Antonio, TX 
##                               42                               75 
##             San Diego County, CA                San Francisco, CA 
##                              105                               21 
##                     San Jose, CA                      Seattle, WA 
##                                1                               70 
##           U.S. Total, U.S. Total                   Washington, DC 
##                              118                               26

## Warning: Removed 153 rows containing missing values (geom_point).

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

## 
## Data:
##  [1] 2.620 2.875 2.320 3.215 3.440 3.460 3.570 3.190 3.150 3.440 3.440 4.070
## [13] 3.730 3.780 5.250 5.424 5.345 2.200 1.615 1.835 2.465 3.520 3.435 3.840
## [25] 3.845 1.935 2.140 1.513 3.170 2.770 3.570 2.780
## 
## Intervals:
##      min    max count
## 1 1.5105 3.2175    16
## 2 2.6175 3.5725    16
## 3 3.4325 5.4265    16
## 
## Overlap between adjacent intervals:
## [1] 8 8

## Classes 'tbl_df', 'tbl' and 'data.frame':    1424 obs. of  8 variables:
##  $ Indicator Category        : chr  "Life Expectancy and Death Rate (Overall)" "Life Expectancy and Death Rate (Overall)" "Life Expectancy and Death Rate (Overall)" "Life Expectancy and Death Rate (Overall)" ...
##  $ Indicator                 : chr  "All-Cause Mortality Rate (Age-Adjusted; Per 100,000 people)" "All-Cause Mortality Rate (Age-Adjusted; Per 100,000 people)" "All-Cause Mortality Rate (Age-Adjusted; Per 100,000 people)" "All-Cause Mortality Rate (Age-Adjusted; Per 100,000 people)" ...
##  $ Year                      : num  2010 2010 2010 2010 2010 2010 2010 2010 2010 2010 ...
##  $ Sex                       : chr  "Both" "Both" "Both" "Both" ...
##  $ Race/Ethnicity            : chr  "All" "All" "All" "All" ...
##  $ Value                     : num  583 606 630 674 686 ...
##  $ Place                     : chr  "San Francisco, CA" "Seattle, WA" "San Diego County, CA" "Oakland (Alameda County), CA" ...
##  $ BCHC Requested Methodology: chr  "All deaths per 100,000 population using 2010 US Census figures, age adjusted to the year 2000 standard population" "All deaths per 100,000 population using 2010 US Census figures, age adjusted to the year 2000 standard population" "All deaths per 100,000 population using 2010 US Census figures, age adjusted to the year 2000 standard population" "All deaths per 100,000 population using 2010 US Census figures, age adjusted to the year 2000 standard population" ...
## Warning: Removed 40 rows containing missing values (geom_point).

## Warning: Using size for a discrete variable is not advised.

## Warning: Using size for a discrete variable is not advised.

##All Cause Mortality - Boston, MA
jle2 <- read_xlsx("/Users/na/Desktop/Shri R Projects/Grad695 Prject/Data Feb2020/Life Expectancy and Death Rate Overall.xlsx", sheet = 7)
head(jle2)
ggplot(data=jle2) +
  geom_point (mapping = aes(x= Year, y = Value))

ggplot(data=jle2) +
  geom_point (mapping = aes(x= Year, y = Value, color = Sex))

ggplot(data=jle2) +
  geom_point (mapping = aes(x= Year, y = Value, color = Race))

ggplot(data=jle2) +
  geom_point (mapping = aes(x= Year, y = Value, color = Race)) +
  labs(title = "All-Cause Mortality Rate for Boston, MA", subtitle = "All-Cause Mortality Rate (Age-Adjusted; Per 100,000 people)", x = "Year", y = "Mortality Rate", caption = "Data includes 2010-215") +
  theme_classic()

##Life Expectancy - Boston, MA
jle3 <- read_xlsx("/Users/na/Desktop/Shri R Projects/Grad695 Prject/Data Feb2020/Life Expectancy and Death Rate Overall.xlsx", sheet = 8)
head(jle3)
ggplot(data=jle3) +
  geom_point (mapping = aes(x= Year, y = Value, size = Year))

ggplot(data=jle3) +
  geom_point (mapping = aes(x= Year, y = Value, color = Sex))

ggplot(data=jle3) +
  geom_point (mapping = aes(x= Year, y = Value, color = Race))

ggplot(data=jle3) +
  geom_point (mapping = aes(x= Year, y = Value, shape = Race, color = Race, size = 5)) + facet_wrap(~Place, nrow = 2)

ggplot(data=jle3) +
  geom_point (mapping = aes(x= Year, y = Value, color = Year, size = Race)) +
  labs(title = "All-Cause Mortality Rate for Boston, MA", subtitle = "All-Cause Mortality Rate (Age-Adjusted; Per 100,000 people)", x = "Year", y = "Mortality Rate", caption = "Data includes 2010-215") +
  theme_minimal()
## Warning: Using size for a discrete variable is not advised.

##labs(title = "Business Marketing Focus and Internet Use", subtitle = "Data to study impact of internet use", tag = "Figure for Sales at Time.2", x = "X-axis label goes here", y = "Y-axis label goes here", caption = "Long-term impact of the increase of business-to-business utilization of the Internet on the marketing theory and marketing process",
k <- read_xlsx("/Users/na/Desktop/Shri R Projects/Grad695 Prject/Data Feb2020/Maternal and Infant Health.xlsx")
kmu <- read_xlsx("/Users/na/Desktop/Shri R Projects/Grad695 Prject/Data Feb2020/Maternal and Infant Health.xlsx", sheet = 1)
klb <- read_xlsx("/Users/na/Desktop/Shri R Projects/Grad695 Prject/Data Feb2020/Maternal and Infant Health.xlsx", sheet = 2)
kim <- read_xlsx("/Users/na/Desktop/Shri R Projects/Grad695 Prject/Data Feb2020/Maternal and Infant Health.xlsx", sheet = 3)

ggplot(data=kmu) +
  geom_point (mapping = aes(x= Year, y = Value, color = Race, size = Year))
## Warning: Removed 148 rows containing missing values (geom_point).

ggplot(data=klb) +
  geom_point (mapping = aes(x= Year, y = Value, color = Year, size = Race)) +
  theme_minimal()
## Warning: Using size for a discrete variable is not advised.
## Warning: Removed 12 rows containing missing values (geom_point).

ggplot(data=kim) +
  geom_point(mapping = aes(x= Year, y = Value, size=Race, color=Race))
## Warning: Using size for a discrete variable is not advised.
## Warning: Removed 15 rows containing missing values (geom_point).

ggplot(kim, aes(x=Value)) +
  geom_point(mapping = aes(x=Year, y=Value, shape=Race, color = Place)) + theme_minimal()
## Warning: The shape palette can deal with a maximum of 6 discrete values because
## more than 6 becomes difficult to discriminate; you have 8. Consider
## specifying shapes manually if you must have them.
## Warning: Removed 216 rows containing missing values (geom_point).

##Las Vegas Life Expectancy
jlelvma <- read_xlsx("/Users/na/Desktop/Shri R Projects/Grad695 Prject/Data Feb2020/Life Expectancy and Death Rate Overall.xlsx", sheet = 11)

ggplot(data=jlelvma) +
  geom_point (mapping = aes(x= Year, y = Value, shape = Race, color = Race, size = Race)) + facet_wrap(~Place)+
  labs(title = "All-Cause Mortality Rate for Las Vegas, NV & Boston, MA", subtitle = "(Age-Adjusted; Per 100,000 people)", x = "Year", y = "Mortality Rate", caption = "Data includes 2010-215") +
  theme_minimal()
## Warning: Using size for a discrete variable is not advised.
## Warning: The shape palette can deal with a maximum of 6 discrete values because
## more than 6 becomes difficult to discriminate; you have 7. Consider
## specifying shapes manually if you must have them.
## Warning: Removed 16 rows containing missing values (geom_point).

ggplot(data=jlelvma) +
  geom_smooth (mapping = aes(x= Year, y = Value, shape = Race, color = Race, size = Race, group = Race)) + facet_wrap(~Race) +
  labs(title = "All-Cause Mortality Rate for Las Vegas, NV & Boston, MA", subtitle = "(Age-Adjusted; Per 100,000 people)", x = "Year", y = "Mortality Rate", caption = "Data includes 2010-215") +
  theme_classic()
## Warning: Ignoring unknown aesthetics: shape
## Warning: Using size for a discrete variable is not advised.
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 2012
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 2.015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 4.0602
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : span too small. fewer
## data values than degrees of freedom.
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used at
## 2012
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 2.015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal condition
## number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other near
## singularities as well. 4.0602
## Warning: The shape palette can deal with a maximum of 6 discrete values because
## more than 6 becomes difficult to discriminate; you have 7. Consider
## specifying shapes manually if you must have them.

#ggplot(data=jlelvma) +
 # geom_point (mapping = aes(x= Year, y = Value, shape = Race, color = Race), blue = "red") + facet_wrap(~Place)+
  #labs(title = "All-Cause Mortality Rate for Las Vegas, NV & Boston, MA", subtitle = "(Age-Adjusted; Per 100,000 people)", x = "Year", y = "Mortality Rate", caption = "Data includes 2010-215") +
  #theme_classic()

##Imp code chunk
#ggplot(data=jlelvma) +
 # geom_smooth (mapping = aes(x= Year, y = Value, shape = Place, color = Race)) + facet_wrap(Race~Place)+
  #labs(title = "All-Cause Mortality Rate for Las Vegas, NV & Boston, MA", subtitle = "(Age-Adjusted; Per 100,000 people)", x = "Year", y = "Mortality Rate", caption = "Data includes 2010-215") +
  #theme_minimal()