Ptrojct 2

Install library

I am using DBI library for database access and tidyverse library for data manipulation.

library(DBI)
library(tidyverse)

## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --

## v ggplot2 3.3.3     v purrr   0.3.4
## v tibble  3.0.6     v dplyr   1.0.4
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.1

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

Connect to SQL Server

I will create ODBC connection object

con <- dbConnect(odbc::odbc(),
  .connection_string = "Driver={SQL Server};Server=(local);Database=tb;Trusted_Connection=Yes;")

Weight Data for Domestic Cats

144 adult (over 2kg in weight) cats used for experiments with the drug digitalis had their heart and body weight recorded. 47 of the cats were female and 97 were male. https://stat.ethz.ch/R-manual/R-devel/library/boot/html/catsM.html This data frame contains the following columns:

Sex - A factor for the sex of the cat (levels are F and M: all cases are M in this subset). Bwt - Body weight in kg. Hwt - Heart weight in g.

Source The data were obtained from Fisher, R.A. (1947) The analysis of covariance method for the relation between a part and the whole. Biometrics, 3, 65–68.

The leading causes of death by sex and ethnicity in New York City

Cause of death is derived from the NYC death certificate which is issued for every death that occurs in New York City. https://data.cityofnewyork.us/Health/New-York-City-Leading-Causes-of-Death/jb7j-dtam

###Columns in this Dataset: Year - The year of death. Leading Cause - The cause of death. Sex - The decedent’s sex. Race Ethnicity - The decedent’s ethnicity. Deaths - The number of people who died due to cause of death. Death Rate - The death rate within the sex and Race/ethnicity category Age Adjusted Death Rate - The age-adjusted death rate within the sex and Race/ethnicity category

NYC Civil Service Titles

List of Civil Service title codes and title descriptions used by City of New York agencies https://data.cityofnewyork.us/City-Government/NYC-Civil-Service-Titles/nzjr-3966

###Columns in this Dataset: Title Code - Civil Service Title Code Title Description - Name/Description of title Standard Hours - Standard weekly hours for the title Assignment Level - Assignment level within title Union Code - Unique code for union representing title, if applicable Union Description - Name/Description of union representing title, if applicable Bargaining Unit Short Name - Acronym or short name for union designated as the bargaining unit for title, if applicable Bargaining Unit Description - Name/description of union designated as the bargaining unit for title, if applicable Minimum Salary Rate - Minimum salary range for the title Maximum Salary Rate - Maximum salary range for the title

Create tables for datasets:

rs = dbSendStatement(con, "
--DROP TABLE IF EXISTS dbo.cats;
--CREATE TABLE dbo.cats(N VARCHAR(3), Sex VARCHAR(3), Bwt DECIMAL(3,1), Hwt DECIMAL(4,2));

--DROP TABLE IF EXISTS dbo.CauseOfDeath; 
--CREATE TABLE dbo.CauseOfDeath (
--[Year] INT NOT NULL,
--LeadingCause VARCHAR(130) NOT NULL,
--Sex VARCHAR(6),
--RaceEthnicity VARCHAR(40),
--Deaths VARCHAR(40),
--DeathRate VARCHAR(40),
--AgeAdjustedDeathRate VARCHAR(40));

--DROP TABLE IF EXISTS dbo.ServiceTitles;
--CREATE TABLE dbo.ServiceTitles (
--TitleCode CHAR(5) NOT NULL,
--Title_Description VARCHAR(70) NOT NULL,
--StandardHours FLOAT NOT NULL,
--Assignment_Level VARCHAR(50),
--UnionCode VARCHAR(20),
--Union_Description VARCHAR(70),
--BargainingUnitShortName VARCHAR(5),
--BargainingUnitDescription VARCHAR(70),
--MinimumSalaryRate VARCHAR(20),
--MaximumSalaryRate VARCHAR(20));
"
)
dbClearResult(rs)

Load data into tables

rs = dbSendStatement(con, "
-- BULK INSERT dbo.cats FROM 'cats.csv'
-- BULK INSERT dbo.ServiceTitles FROM 'NYC_Civil_Service_Titles.csv'
-- BULK INSERT dbo.CauseOfDeath FROM 'New_York_City_Leading_Causes_of_Death.csv' WITH (FORMAT = 'CSV');
")
dbClearResult(rs)

Query database into R

rs <- dbSendQuery(con, "
SELECT N
      ,Sex
      ,Bwt
      ,Hwt
FROM dbo.cats
")
df_cat = data.frame(dbFetch(rs))
dbClearResult(rs)
rs <- dbSendQuery(con, "
SELECT Year
      ,LeadingCause
      ,Sex
      ,RaceEthnicity
      ,Deaths
      ,DeathRate
      ,AgeAdjustedDeathRate
FROM dbo.CauseOfDeath2
")
df_c_of_d = data.frame(dbFetch(rs))
dbClearResult(rs)
rs <- dbSendQuery(con, "
SELECT TitleCode
      ,Title_Description
      ,StandardHours
      ,Assignment_Level
      ,UnionCode
      ,Union_Description
      ,BargainingUnitShortName
      ,BargainingUnitDescription
      ,MinimumSalaryRate
      ,MaximumSalaryRate
FROM dbo.ServiceTitles
")
df_tc = data.frame(dbFetch(rs))
dbClearResult(rs)

## Convert R dataframe to tibble

cats = tibble(df_cat)
CauseOfDeath = tibble(df_c_of_d)
ServiceTitles = tibble(df_tc)

Preview results

Cats

head(cats)

## # A tibble: 6 x 4
##   N     Sex     Bwt   Hwt
##   <chr> <chr> <dbl> <dbl>
## 1 1     F       2     7  
## 2 2     F       2     7.4
## 3 3     F       2     9.5
## 4 4     F       2.1   7.2
## 5 5     F       2.1   7.3
## 6 6     F       2.1   7.6

Cause Of Death

head(CauseOfDeath)

## # A tibble: 6 x 7
##    Year LeadingCause       Sex   RaceEthnicity Deaths DeathRate AgeAdjustedDeat~
##   <int> <chr>              <chr> <chr>         <chr>  <chr>     <chr>           
## 1  2010 Influenza (Flu) a~ F     Hispanic      228    18.7      23.1            
## 2  2008 Accidents Except ~ F     Hispanic      68     5.8       6.6             
## 3  2013 Accidents Except ~ M     White Non-Hi~ 271    20.1      17.9            
## 4  2010 Cerebrovascular D~ M     Hispanic      140    12.3      21.4            
## 5  2009 Assault (Homicide~ M     Black Non-Hi~ 255    30        30              
## 6  2012 Mental and Behavi~ F     Other Race/ ~ .      .         .

ServiceTitles

head(ServiceTitles)

## # A tibble: 6 x 10
##   TitleCode Title_Descripti~ StandardHours Assignment_Level UnionCode
##   <chr>     <chr>                    <dbl> <chr>            <chr>    
## 1 00136     BEAUTICIAN                37.5 0                124      
## 2 03647     SPECIAL ASSISTA~          35   0                775      
## 3 03927     ASSISTANT SYSTE~          35   0                129      
## 4 05026     DIRECTOR OF INT~          35   0                775      
## 5 05058     FOOD SERVICE MA~          35   0                121      
## 6 05072     STUDENT LEGAL S~          35   0                19       
## # ... with 5 more variables: Union_Description <chr>,
## #   BargainingUnitShortName <chr>, BargainingUnitDescription <chr>,
## #   MinimumSalaryRate <chr>, MaximumSalaryRate <chr>

Compute cases per year

CauseOfDeath$Year <- as.integer(CauseOfDeath$Year) 
CauseOfDeath$Deaths <- as.integer(CauseOfDeath$Deaths)

## Warning: NAs introduced by coercion

CauseOfDeath %>%  count(Year, wt = Deaths)

## # A tibble: 8 x 2
##    Year     n
## * <int> <int>
## 1  2007 53996
## 2  2008 54138
## 3  2009 52820
## 4  2010 52505
## 5  2011 52726
## 6  2012 52420
## 7  2013 53387
## 8  2014 53006

Cats pivot

cats %>% pivot_wider(names_from = Sex, values_from = c(Bwt,Hwt))

## # A tibble: 144 x 5
##    N     Bwt_F Bwt_M Hwt_F Hwt_M
##    <chr> <dbl> <dbl> <dbl> <dbl>
##  1 1       2      NA   7      NA
##  2 2       2      NA   7.4    NA
##  3 3       2      NA   9.5    NA
##  4 4       2.1    NA   7.2    NA
##  5 5       2.1    NA   7.3    NA
##  6 6       2.1    NA   7.6    NA
##  7 7       2.1    NA   8.1    NA
##  8 8       2.1    NA   8.2    NA
##  9 9       2.1    NA   8.3    NA
## 10 10      2.1    NA   8.5    NA
## # ... with 134 more rows

Unite Minimum Salary Rate and Maximum Salary Rate

ServiceTitles %>% unite(SalaryRate, MinimumSalaryRate, MaximumSalaryRate)

## # A tibble: 3,022 x 9
##    TitleCode Title_Descripti~ StandardHours Assignment_Level UnionCode
##    <chr>     <chr>                    <dbl> <chr>            <chr>    
##  1 00136     BEAUTICIAN                37.5 0                124      
##  2 03647     SPECIAL ASSISTA~          35   0                775      
##  3 03927     ASSISTANT SYSTE~          35   0                129      
##  4 05026     DIRECTOR OF INT~          35   0                775      
##  5 05058     FOOD SERVICE MA~          35   0                121      
##  6 05072     STUDENT LEGAL S~          35   0                19       
##  7 05085     EXECUTIVE DIREC~          35   0                775      
##  8 05103     DIRECTOR (PLANT~          35   0                772      
##  9 05106     ASSISTANT TO TH~          35   0                665      
## 10 05107     SECRETARY TO AS~          35   0                665      
## # ... with 3,012 more rows, and 4 more variables: Union_Description <chr>,
## #   BargainingUnitShortName <chr>, BargainingUnitDescription <chr>,
## #   SalaryRate <chr>

Disconnect from database

dbDisconnect(con)