I am using DBI library for database access and tidyverse library for data manipulation.
library(DBI)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.0.6 v dplyr 1.0.4
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
I will create ODBC connection object
con <- dbConnect(odbc::odbc(),
.connection_string = "Driver={SQL Server};Server=(local);Database=tb;Trusted_Connection=Yes;")
144 adult (over 2kg in weight) cats used for experiments with the drug digitalis had their heart and body weight recorded. 47 of the cats were female and 97 were male. https://stat.ethz.ch/R-manual/R-devel/library/boot/html/catsM.html This data frame contains the following columns:
Sex - A factor for the sex of the cat (levels are F and M: all cases are M in this subset). Bwt - Body weight in kg. Hwt - Heart weight in g.
Source The data were obtained from Fisher, R.A. (1947) The analysis of covariance method for the relation between a part and the whole. Biometrics, 3, 65–68.
Cause of death is derived from the NYC death certificate which is issued for every death that occurs in New York City. https://data.cityofnewyork.us/Health/New-York-City-Leading-Causes-of-Death/jb7j-dtam
###Columns in this Dataset: Year - The year of death. Leading Cause - The cause of death. Sex - The decedent’s sex. Race Ethnicity - The decedent’s ethnicity. Deaths - The number of people who died due to cause of death. Death Rate - The death rate within the sex and Race/ethnicity category Age Adjusted Death Rate - The age-adjusted death rate within the sex and Race/ethnicity category
List of Civil Service title codes and title descriptions used by City of New York agencies https://data.cityofnewyork.us/City-Government/NYC-Civil-Service-Titles/nzjr-3966
###Columns in this Dataset: Title Code - Civil Service Title Code Title Description - Name/Description of title Standard Hours - Standard weekly hours for the title Assignment Level - Assignment level within title Union Code - Unique code for union representing title, if applicable Union Description - Name/Description of union representing title, if applicable Bargaining Unit Short Name - Acronym or short name for union designated as the bargaining unit for title, if applicable Bargaining Unit Description - Name/description of union designated as the bargaining unit for title, if applicable Minimum Salary Rate - Minimum salary range for the title Maximum Salary Rate - Maximum salary range for the title
rs = dbSendStatement(con, "
--DROP TABLE IF EXISTS dbo.cats;
--CREATE TABLE dbo.cats(N VARCHAR(3), Sex VARCHAR(3), Bwt DECIMAL(3,1), Hwt DECIMAL(4,2));
--DROP TABLE IF EXISTS dbo.CauseOfDeath;
--CREATE TABLE dbo.CauseOfDeath (
--[Year] INT NOT NULL,
--LeadingCause VARCHAR(130) NOT NULL,
--Sex VARCHAR(6),
--RaceEthnicity VARCHAR(40),
--Deaths VARCHAR(40),
--DeathRate VARCHAR(40),
--AgeAdjustedDeathRate VARCHAR(40));
--DROP TABLE IF EXISTS dbo.ServiceTitles;
--CREATE TABLE dbo.ServiceTitles (
--TitleCode CHAR(5) NOT NULL,
--Title_Description VARCHAR(70) NOT NULL,
--StandardHours FLOAT NOT NULL,
--Assignment_Level VARCHAR(50),
--UnionCode VARCHAR(20),
--Union_Description VARCHAR(70),
--BargainingUnitShortName VARCHAR(5),
--BargainingUnitDescription VARCHAR(70),
--MinimumSalaryRate VARCHAR(20),
--MaximumSalaryRate VARCHAR(20));
"
)
dbClearResult(rs)
rs = dbSendStatement(con, "
-- BULK INSERT dbo.cats FROM 'cats.csv'
-- BULK INSERT dbo.ServiceTitles FROM 'NYC_Civil_Service_Titles.csv'
-- BULK INSERT dbo.CauseOfDeath FROM 'New_York_City_Leading_Causes_of_Death.csv' WITH (FORMAT = 'CSV');
")
dbClearResult(rs)
rs <- dbSendQuery(con, "
SELECT N
,Sex
,Bwt
,Hwt
FROM dbo.cats
")
df_cat = data.frame(dbFetch(rs))
dbClearResult(rs)
rs <- dbSendQuery(con, "
SELECT Year
,LeadingCause
,Sex
,RaceEthnicity
,Deaths
,DeathRate
,AgeAdjustedDeathRate
FROM dbo.CauseOfDeath2
")
df_c_of_d = data.frame(dbFetch(rs))
dbClearResult(rs)
rs <- dbSendQuery(con, "
SELECT TitleCode
,Title_Description
,StandardHours
,Assignment_Level
,UnionCode
,Union_Description
,BargainingUnitShortName
,BargainingUnitDescription
,MinimumSalaryRate
,MaximumSalaryRate
FROM dbo.ServiceTitles
")
df_tc = data.frame(dbFetch(rs))
dbClearResult(rs)
cats = tibble(df_cat)
CauseOfDeath = tibble(df_c_of_d)
ServiceTitles = tibble(df_tc)
head(cats)
## # A tibble: 6 x 4
## N Sex Bwt Hwt
## <chr> <chr> <dbl> <dbl>
## 1 1 F 2 7
## 2 2 F 2 7.4
## 3 3 F 2 9.5
## 4 4 F 2.1 7.2
## 5 5 F 2.1 7.3
## 6 6 F 2.1 7.6
head(CauseOfDeath)
## # A tibble: 6 x 7
## Year LeadingCause Sex RaceEthnicity Deaths DeathRate AgeAdjustedDeat~
## <int> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 2010 Influenza (Flu) a~ F Hispanic 228 18.7 23.1
## 2 2008 Accidents Except ~ F Hispanic 68 5.8 6.6
## 3 2013 Accidents Except ~ M White Non-Hi~ 271 20.1 17.9
## 4 2010 Cerebrovascular D~ M Hispanic 140 12.3 21.4
## 5 2009 Assault (Homicide~ M Black Non-Hi~ 255 30 30
## 6 2012 Mental and Behavi~ F Other Race/ ~ . . .
head(ServiceTitles)
## # A tibble: 6 x 10
## TitleCode Title_Descripti~ StandardHours Assignment_Level UnionCode
## <chr> <chr> <dbl> <chr> <chr>
## 1 00136 BEAUTICIAN 37.5 0 124
## 2 03647 SPECIAL ASSISTA~ 35 0 775
## 3 03927 ASSISTANT SYSTE~ 35 0 129
## 4 05026 DIRECTOR OF INT~ 35 0 775
## 5 05058 FOOD SERVICE MA~ 35 0 121
## 6 05072 STUDENT LEGAL S~ 35 0 19
## # ... with 5 more variables: Union_Description <chr>,
## # BargainingUnitShortName <chr>, BargainingUnitDescription <chr>,
## # MinimumSalaryRate <chr>, MaximumSalaryRate <chr>
CauseOfDeath$Year <- as.integer(CauseOfDeath$Year)
CauseOfDeath$Deaths <- as.integer(CauseOfDeath$Deaths)
## Warning: NAs introduced by coercion
CauseOfDeath %>% count(Year, wt = Deaths)
## # A tibble: 8 x 2
## Year n
## * <int> <int>
## 1 2007 53996
## 2 2008 54138
## 3 2009 52820
## 4 2010 52505
## 5 2011 52726
## 6 2012 52420
## 7 2013 53387
## 8 2014 53006
cats %>% pivot_wider(names_from = Sex, values_from = c(Bwt,Hwt))
## # A tibble: 144 x 5
## N Bwt_F Bwt_M Hwt_F Hwt_M
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1 2 NA 7 NA
## 2 2 2 NA 7.4 NA
## 3 3 2 NA 9.5 NA
## 4 4 2.1 NA 7.2 NA
## 5 5 2.1 NA 7.3 NA
## 6 6 2.1 NA 7.6 NA
## 7 7 2.1 NA 8.1 NA
## 8 8 2.1 NA 8.2 NA
## 9 9 2.1 NA 8.3 NA
## 10 10 2.1 NA 8.5 NA
## # ... with 134 more rows
ServiceTitles %>% unite(SalaryRate, MinimumSalaryRate, MaximumSalaryRate)
## # A tibble: 3,022 x 9
## TitleCode Title_Descripti~ StandardHours Assignment_Level UnionCode
## <chr> <chr> <dbl> <chr> <chr>
## 1 00136 BEAUTICIAN 37.5 0 124
## 2 03647 SPECIAL ASSISTA~ 35 0 775
## 3 03927 ASSISTANT SYSTE~ 35 0 129
## 4 05026 DIRECTOR OF INT~ 35 0 775
## 5 05058 FOOD SERVICE MA~ 35 0 121
## 6 05072 STUDENT LEGAL S~ 35 0 19
## 7 05085 EXECUTIVE DIREC~ 35 0 775
## 8 05103 DIRECTOR (PLANT~ 35 0 772
## 9 05106 ASSISTANT TO TH~ 35 0 665
## 10 05107 SECRETARY TO AS~ 35 0 665
## # ... with 3,012 more rows, and 4 more variables: Union_Description <chr>,
## # BargainingUnitShortName <chr>, BargainingUnitDescription <chr>,
## # SalaryRate <chr>
dbDisconnect(con)