-SAS University Edition/ SAS Studio
-https://support.sas.com/en/knowledge-base.html
-http://support.sas.com/kb/40/700.html
-Download the following csv files from the gapminder web site (https://www.gapminder.org).
cholesterol_fat_in_blood_women_mmolperl.csv
body_mass_index_bmi_women_kgperm2.csv
blood_pressure_sbp_women_mmhg.csv
children_per_woman_total_fertility.csv
contraceptive_use_percent_of_women_ages_15_49.csv
breast_cancer_number_of_female_deaths.csv
infant_mortality_rate_per_1000_births.csv
maternal_deaths_total_number.csv
maternal_mortality_ratio_per_100000_live_births.csv
-Some numeric variables will be presented as character variables following importation.
-They needed to be conveted to the appropriate variable types.
-It could be achived in many ways: e.g., group the numeric and character variables into two datasets, convert the char variables using input function and concatenate/merge both datasets. In another way, determine which variables are char and use an array to convert them into num variables.
-The example provided at http://support.sas.com/kb/40/700.html can be used to agnostically to process variable conversion. It would be helpful when there are many variables and it’s tedious to identify individual char variables.
-The codes in that example would convert the character variables in the test dataset to numeric variables in the output dataset test2.
-Use these codes as it is from proc contents onward but substitute country for id and your dataset name for test.
%let path=/folders/myshortcuts/datasets;
libname usr "&path";
-Some variables would be imported as character variables instead of numeric variables as would be normally expected.
-These could be converted back to numeric as described above. However, it would be done following the array processing as shown below.
Output: Total rows: 189 and Total columns: 30
proc import datafile="&path/cholesterol_fat_in_blood_women_mmolperl.csv" dbms=csv out=usr.fm_tc replace;
getnames=yes;
run;
Output: Total rows: 189 and Total columns: 30
proc import datafile="&path/body_mass_index_bmi_women_kgperm2.csv" dbms=csv out=usr.fm_bmi replace;
getnames=yes;
run;
Output: Total rows: 189 and Total columns: 30
proc import datafile="&path/blood_pressure_sbp_women_mmhg.csv" dbms=csv out=usr.fm_sbp replace;
getnames=yes;
run;
Output: Total rows: 184 and Total columns: 220
proc import datafile="&path/children_per_woman_total_fertility.csv" dbms=csv out=usr.fm_fertility replace;
getnames=yes;
run;
output: Total rows: 187 and Total columns: 28
proc import datafile="&path/breast_cancer_number_of_female_deaths.csv" dbms=csv out=usr.fm_brstc_death replace;
getnames=yes;
run;
Output: Total rows: 185 and Total columns: 58.
Many missing values in this dataset.
*Some numeric variables are imported as character variables and needed variable conversion.
proc import datafile="&path/contraceptive_use_percent_of_women_ages_15_49.csv" dbms=csv out=usr.fm_contracept_use replace;
getnames=yes;
run;
proc contents data=USR.FM_CONTRACEPT_USE out=vars(keep=name type);
run;
data vars;
set vars;
if type=2 and name ne 'country';
newname=trim(left(name))||"_n";
run;
options symbolgen;
proc sql;
select trim(left(name)), trim(left(newname)),
trim(left(newname))||'='||trim(left(name))
into :c_list separated by ' ', :n_list separated by ' ',
:renam_list separated by ' '
from vars;
quit;
data usr.FM_CONTRACEPT_USE_num;
set USR.FM_CONTRACEPT_USE;
array ch(*) $ &c_list;
array nu(*) &n_list;
do i = 1 to dim(ch);
nu(i)=input(ch(i),8.);
end;
drop i &c_list;
rename &renam_list;
run;
Output: Total rows: 194 and Total columns: 217.
Many missing values in this dataset.
*Some numeric variables are imported as character variables and need conversion to numeric ones.
proc import datafile="&path/infant_mortality_rate_per_1000_births.csv" dbms=csv out=usr.infant_mortality replace;
getnames=yes;
run;
proc contents data=USR.infant_mortality out=vars(keep=name type);
run;
data vars;
set vars;
if type=2 and name ne 'country';
newname=trim(left(name))||"_n";
run;
options symbolgen;
proc sql;
select trim(left(name)), trim(left(newname)),
trim(left(newname))||'='||trim(left(name))
into :c_list separated by ' ', :n_list separated by ' ',
:renam_list separated by ' '
from vars;
quit;
data usr.infant_mortality_num;
set USR.infant_mortality;
array ch(*) $ &c_list;
array nu(*) &n_list;
do i = 1 to dim(ch);
nu(i)=input(ch(i),8.);
end;
drop i &c_list;
rename &renam_list;
run;
include_graphics("infant_mortality_num.PNG")
Output: Total rows: 183 and Total columns: 215.
Many missing valus in this dataset.
*Some numeric variables are imported as character variables.
proc import datafile="&path/maternal_deaths_total_number.csv" dbms=csv out=usr.maternal_death replace;
getnames=yes;
run;
proc contents data=usr.maternal_death out=vars(keep=name type);
run;
data vars;
set vars;
if type=2 and name ne 'country';
newname=trim(left(name))||"_n";
run;
options symbolgen;
proc sql;
select trim(left(name)), trim(left(newname)),
trim(left(newname))||'='||trim(left(name))
into :c_list separated by ' ', :n_list separated by ' ',
:renam_list separated by ' '
from vars;
quit;
data usr.maternal_death_num;
set usr.maternal_death;
array ch(*) $ &c_list;
array nu(*) &n_list;
do i = 1 to dim(ch);
nu(i)=input(ch(i),8.);
end;
drop i &c_list;
rename &renam_list;
run;
include_graphics("maternal_death_num.PNG")
Output: Total rows: 187 and Total columns: 215
Many missing valus in this dataset.
*Some numeric variables are imported as character variables.
proc import datafile="&path/maternal_mortality_ratio_per_100000_live_births.csv" dbms=csv out=usr.maternal_mortality_ratio replace;
getnames=yes;
run;
proc contents data=usr.maternal_mortality_ratio out=vars(keep=name type);
run;
data vars;
set vars;
if type=2 and name ne 'country';
newname=trim(left(name))||"_n";
run;
options symbolgen;
proc sql;
select trim(left(name)), trim(left(newname)),
trim(left(newname))||'='||trim(left(name))
into :c_list separated by ' ', :n_list separated by ' ',
:renam_list separated by ' '
from vars;
quit;
data usr.maternal_mortality_ratio_num;
set usr.maternal_mortality_ratio;
array ch(*) $ &c_list;
array nu(*) &n_list;
do i = 1 to dim(ch);
nu(i)=input(ch(i),8.);
end;
drop i &c_list;
rename &renam_list;
run;
include_graphics("maternal_mortality_ratio_num.PNG")
array statementsOutput: Total rows: 5481 and Total columns: 3
data usr.fm_tc_long;
set usr.fm_tc;
array widetolong{1980:2008} _1980-_2008;
do year = 1980 to 2008;
tc = widetolong(year);
output;
end;
drop _1980-_2008;
run;
Output: Total rows: 5481 and Total columns: 3
data usr.fm_bmi_long;
set usr.fm_bmi;
array widetolong{1980:2008} _1980-_2008;
do year = 1980 to 2008;
bmi = widetolong(year);
output;
end;
drop _1980-_2008;
run;
Output: Total rows: 5481 and Total columns: 3
data usr.fm_brstc_death_long;
set usr.fm_brstc_death;
array widetolong{1990:2016} _1990-_2016;
do year = 1990 to 2016;
breast_cancer_deaths = widetolong(year);
output;
end;
drop _1990-_2016;
run;
Output: Total rows: 40296 and Total columns: 3
data usr.fm_fertility_long;
set usr.fm_fertility;
array widetolong{1800:2018} _1800-_2018;
do year = 1800 to 2008;
total_fertility = widetolong(year);
output;
end;
drop _1800-_2018;
run;
Output: Total rows: 5049 and Total columns: 3
data usr.fm_brstc_death_long;
set usr.fm_brstc_death;
array widetolong{1990:2016} _1990-_2016;
do year = 1990 to 2016;
breast_cancer_deaths = widetolong(year);
output;
end;
drop _1990-_2016;
run;
Some numeric variables were imported as character variables.
After transposition all variables are character in nature.
Hence they needed approprite conversion.
Output: Total rows: 10545 and Total columns: 3.
proc transpose data=usr.fm_contracept_use out=usr.fm_contracept_use_t;
by country;
var _1961-_2017;
run;
Output: Total rows: 41904 and Total columns: 3
data usr.infant_mortality_long;
set usr.infant_mortality_num;
array widetolong{1800:2015} _1800-_2015;
do year = 1800 to 2015;
infant_mortality_rate = widetolong(year);
output;
end;
drop _1800-_2015;
run;
Output: Total rows: 39162 and Total columns: 3
data usr.maternal_death_long;
set usr.maternal_death_num;
array widetolong{1800:2013} _1800-_2013;
do year = 1800 to 2013;
maternal_deaths = widetolong(year);
output;
end;
drop _1800-_2013;
run;
Output: Total rows: 40018 and Total columns: 3
data usr.maternal_mortality_ratio_long;
set usr.maternal_mortality_ratio_num;
array widetolong{1800:2013} _1800-_2013;
do year = 1800 to 2013;
maternal_mortality_ratio = widetolong(year);
output;
end;
drop _1800-_2013;
run;
where clauseOutput: Total rows: 3382 and Total columns: 11
proc sql;
create table usr.gpm_a as
select
t.country, t.year, tc,
bmi,
sbp,
total_fertility,
breast_cancer_deaths,
contraceptive_use_percent,
infant_mortality_rate,
maternal_deaths,
maternal_mortality_ratio
from
usr.fm_tc_long as t,
usr.fm_bmi_long as b,
usr.fm_sbp_long as s,
usr.fm_fertility_long as f,
usr.fm_brstc_death_long as br,
usr.fm_contracept_use_long as c,
usr.infant_mortality_long as i,
usr.maternal_death_long as m,
usr.maternal_mortality_ratio_long as mm
where
t.country=b.country=s.country=f.country=br.country=c.country=i.country=m.country=mm.country
and
t.year=b.year=s.year=f.year=br.year=c.year=i.year=m.year=mm.year;
quit;
Output: Total rows: 3382 and Total columns: 8
proc sql;
create table usr.gpm_a_nomiss as
select country, year, tc, bmi, sbp, total_fertility, breast_cancer_deaths, infant_mortality_rate from usr.gpm
where country and year and tc and bmi and sbp and total_fertility and breast_cancer_deaths and infant_mortality_rate is not missing;
quit;
Output: Total rows: 3382 and Total columns: 11
proc sql;
create table usr.gpm_a_i as
select
t.country, t.year, tc,
bmi,
sbp,
total_fertility,
breast_cancer_deaths,
contraceptive_use_percent,
infant_mortality_rate,
maternal_deaths,
maternal_mortality_ratio
from
usr.fm_tc_long as t
inner join usr.fm_bmi_long as b
on (t.country=b.country and t.year=b.year)
inner join usr.fm_sbp_long as s
on (t.country=s.country and t.year=s.year)
inner join usr.fm_fertility_long as f
on (t.country=f.country and t.year=f.year)
inner join usr.fm_brstc_death_long as br
on (t.country=br.country and t.year=br.year)
inner join usr.fm_contracept_use_long as c
on (t.country=c.country and t.year=c.year)
inner join usr.infant_mortality_long as i
on (t.country=i.country and t.year=i.year)
inner join usr.maternal_death_long as m
on (t.country=m.country and t.year=m.year)
inner join usr.maternal_mortality_ratio_long as mm
on (t.country=mm.country and t.year=mm.year);
quit;
Output: Total rows: 3477 and Total columns: 8
proc sql;
create table usr.gpm_a_i_1 as
select
t.country, t.year, tc,
bmi,
sbp,
total_fertility,
breast_cancer_deaths,
infant_mortality_rate
from
usr.fm_tc_long as t
inner join usr.fm_bmi_long as b
on (t.country=b.country and t.year=b.year)
inner join usr.fm_sbp_long as s
on (t.country=s.country and t.year=s.year)
inner join usr.fm_fertility_long as f
on (t.country=f.country and t.year=f.year)
inner join usr.fm_brstc_death_long as br
on (t.country=br.country and t.year=br.year)
inner join usr.infant_mortality_long as i
on (t.country=i.country and t.year=i.year);
quit;
Output: Total rows: 5481 and Total columns: 8
proc sql;
create table usr.gpm_a_l as
select
t.country, t.year, tc, bmi, sbp, total_fertility, breast_cancer_deaths, infant_mortality_rate
from
usr.fm_tc_long as t
left join usr.fm_bmi_long as b
on (t.country=b.country and t.year=b.year)
left join usr.fm_sbp_long as s
on (t.country=s.country and t.year=s.year)
left join usr.fm_fertility_long as f
on (t.country=f.country and t.year=f.year)
left join usr.fm_brstc_death_long as br
on (t.country=br.country and t.year=br.year)
left join usr.infant_mortality_long as i
on (t.country=i.country and t.year=i.year);
quit;
Output: Total rows: 41904 and Total columns: 8
proc sql;
create table usr.gpm_a_r as
select
t.country, t.year, tc, bmi, sbp, total_fertility, breast_cancer_deaths, infant_mortality_rate
from
usr.fm_tc_long as t
right join usr.fm_bmi_long as b
on (t.country=b.country and t.year=b.year)
right join usr.fm_sbp_long as s
on (t.country=s.country and t.year=s.year)
right join usr.fm_fertility_long as f
on (t.country=f.country and t.year=f.year)
right join usr.fm_brstc_death_long as br
on (t.country=br.country and t.year=br.year)
right join usr.infant_mortality_long as i
on (t.country=i.country and t.year=i.year);
quit;
Output: Total rows: 3477 and Total columns: 8
proc sql;
create table usr.gpm_a_r_nomiss as
select * from usr.gpm_a_r
where country and year and tc and bmi and sbp and total_fertility and breast_cancer_deaths and infant_mortality_rate is not missing;
quit;
Output: Total rows: 76568 and Total columns: 8
proc sql;
create table usr.gpm_a_f as
select
t.country, t.year, tc, bmi, sbp, total_fertility, breast_cancer_deaths, infant_mortality_rate
from
usr.fm_tc_long as t
full join usr.fm_bmi_long as b
on (t.country=b.country and t.year=b.year)
full join usr.fm_sbp_long as s
on (t.country=s.country and t.year=s.year)
full join usr.fm_fertility_long as f
on (t.country=f.country and t.year=f.year)
full join usr.fm_brstc_death_long as br
on (t.country=br.country and t.year=br.year)
full join usr.infant_mortality_long as i
on (t.country=i.country and t.year=i.year);
quit;
Output: Total rows: 3477 and Total columns: 8
proc sql;
create table usr.gpm_a_f_nomiss as
select *
from usr.gpm_a_f
where country and year and tc and bmi and sbp and total_fertility and breast_cancer_deaths and infant_mortality_rate is not missing;
quit;