library(readr)
d_csv <- read_csv("C:/Users/AHMED/Desktop/US2016Health.csv", col_names = TRUE)
## Parsed with column specification:
## cols(
## .default = col_double(),
## Geo_FIPS = col_integer(),
## Geo_NAME = col_character(),
## Geo_QNAME = col_character(),
## Geo_STATE = col_integer(),
## Geo_COUNTY = col_integer(),
## SE_NV002_001 = col_integer(),
## SE_T007_001 = col_integer(),
## SE_T008_001 = col_integer(),
## SE_T008_002 = col_integer(),
## SE_T008_003 = col_integer(),
## SE_T008_004 = col_integer(),
## SE_NV007_001 = col_character(),
## SE_NV007_002 = col_integer(),
## SE_T010_001 = col_integer(),
## SE_T010_002 = col_integer(),
## SE_T010_003 = col_integer()
## )
## See spec(...) for full column specifications.
head(d_csv)
## # A tibble: 1 × 32
## Geo_FIPS Geo_NAME Geo_QNAME Geo_STATE Geo_COUNTY
## <int> <chr> <chr> <int> <int>
## 1 36061 New York County New York County, New York 36 61
## # ... with 27 more variables: SE_T001_001 <dbl>, SE_T001_002 <dbl>,
## # SE_T002_001 <dbl>, SE_T003_001 <dbl>, SE_NV002_001 <int>,
## # SE_T007_001 <int>, SE_T007_002 <dbl>, SE_T008_001 <int>,
## # SE_T008_002 <int>, SE_T008_003 <int>, SE_T008_004 <int>,
## # SE_NV006_001 <dbl>, SE_NV006_002 <dbl>, SE_NV006_003 <dbl>,
## # SE_NV006_004 <dbl>, SE_T009_001 <dbl>, SE_T009_002 <dbl>,
## # SE_NV007_001 <chr>, SE_NV007_002 <int>, SE_T010_001 <int>,
## # SE_T010_002 <int>, SE_T010_003 <int>, SE_NV008_001 <dbl>,
## # SE_NV008_002 <dbl>, SE_NV008_003 <dbl>, SE_T011_001 <dbl>,
## # SE_T011_002 <dbl>
Health2016<- subset(d_csv, select=c(SE_T001_001, SE_T003_001, SE_T007_001, SE_T008_001, SE_T008_002, SE_T011_001, SE_T011_002, SE_T010_002 ))
str(Health2016)
## Classes 'tbl_df', 'tbl' and 'data.frame': 1 obs. of 8 variables:
## $ SE_T001_001: num 3.5
## $ SE_T003_001: num 8.62
## $ SE_T007_001: int 12134
## $ SE_T008_001: int 550
## $ SE_T008_002: int 415
## $ SE_T011_001: num 13.2
## $ SE_T011_002: num 21.3
## $ SE_T010_002: int 11426
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Health_2016<-rename(Health2016,
"unhealthyDays"= SE_T001_001,
"lowBirth" =SE_T003_001,
"premDeath"= SE_T007_001,
"infantMo"=SE_T008_001,
"childMo" =SE_T008_002,
"currentSmok" =SE_T011_001,
"drinkAdult" = SE_T011_002,
"Chlamyida" =SE_T010_002)
str(Health_2016)
## Classes 'tbl_df', 'tbl' and 'data.frame': 1 obs. of 8 variables:
## $ unhealthyDays: num 3.5
## $ lowBirth : num 8.62
## $ premDeath : int 12134
## $ infantMo : int 550
## $ childMo : int 415
## $ currentSmok : num 13.2
## $ drinkAdult : num 21.3
## $ Chlamyida : int 11426
library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: purrr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag(): dplyr, stats
library(dplyr)
CombinedMort1 <-gather(Health_2016, infantMo,childMo, key = "Mort", value="Mort_Count")
head(CombinedMort1)
## # A tibble: 2 × 8
## unhealthyDays lowBirth premDeath currentSmok drinkAdult Chlamyida
## <dbl> <dbl> <int> <dbl> <dbl> <int>
## 1 3.5 8.624041 12134 13.2 21.3 11426
## 2 3.5 8.624041 12134 13.2 21.3 11426
## # ... with 2 more variables: Mort <chr>, Mort_Count <int>
CombinedMort2 <- select(CombinedMort1,-unhealthyDays)
library(ggplot2)
g1 <- ggplot(data=CombinedMort1, aes(x=Mort, y=Mort_Count)) +
geom_bar( stat="identity")
library(ggthemes)
g1 + theme_solarized(light=FALSE) + scale_colour_solarized("red")

library(ggplot2)
ggplot(CombinedMort2, aes(x=Mort, y=Mort_Count)) +
geom_point() + geom_smooth()
## `geom_smooth()` using method = 'loess'
