##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Warning: package 'tidyr' was built under R version 3.1.2
## Loading required package: bitops
## Warning: package 'reshape2' was built under R version 3.1.2
Humanitarian data (from Sam C-D)
#Load the file from local location (the header line is repeated, so some extra work is needed before
# loading the csv)
file <- "/Users/samdevries/Downloads/OP3_DANA_EN.xlsx - DANA.csv"
headers = read.csv(file, skip = 0, header = F, nrows = 1, as.is = T)
df = read.csv(file, skip = 3, header = F)
colnames(df)= headers
df$iso = paste(df$iso2,df$iso3)
#cleaning up the data by focusing on columns dealing with property destruction
destruct <-
df %>%
gather(cat,total,ends_with("_destroyed"),ends_with("_affected"),-iso) %>%
select(iso,cat,total)
## Warning: attributes are not identical across measure variables; they will
## be dropped
destruct_byCat <-
destruct %>%
separate(cat,into=c("cat1","cat2"),sep="_")
#getting a breakdown by region
destruct_byCat %>%
group_by(iso,cat2) %>%
summarise(
total = n()
)
## Source: local data frame [44 x 3]
## Groups: iso
##
## iso cat2 total
## 1 affected 75
## 2 destroyed 123
## 3 AG ATG affected 175
## 4 AG ATG destroyed 287
## 5 AI AIA affected 225
## 6 AI AIA destroyed 369
## 7 AT ATG affected 175
## 8 AT ATG destroyed 287
## 9 BH BHS affected 25
## 10 BH BHS destroyed 41
## 11 BH BHS affected 25
## 12 BH BHS destroyed 41
## 13 BS BHS affected 125
## 14 BS BHS destroyed 205
## 15 CU CUB affected 2475
## 16 CU CUB destroyed 4059
## 17 DM affected 50
## 18 DM destroyed 82
## 19 DM affected 75
## 20 DM destroyed 123
## 21 DO DOM affected 75
## 22 DO DOM destroyed 123
## 23 DO DOM affected 300
## 24 DO DOM destroyed 492
## 25 HT HTI affected 350
## 26 HT HTI destroyed 574
## 27 KN KNA affected 175
## 28 KN KNA destroyed 287
## 29 PR PRI affected 225
## 30 PR PRI destroyed 369
## 31 SX SXM affected 75
## 32 SX SXM destroyed 123
## 33 TC affected 25
## 34 TC destroyed 41
## 35 TC TCA affected 350
## 36 TC TCA destroyed 574
## 37 US USA affected 75
## 38 US USA destroyed 123
## 39 VG VBG affected 50
## 40 VG VBG destroyed 82
## 41 VG VGB affected 325
## 42 VG VGB destroyed 533
## 43 VI VIR affected 75
## 44 VI VIR destroyed 123
#a by region analysis of total numbers of deceased, missing, etc. I chose to order by the number
# in shelters as this might provide a relevant real-world value. Given these numbers, it might be
# helpful for an organization to have numbers like the total number of shelters and capactiy in a region
byIso <- df %>% group_by(iso)
byIso %>%
summarise(
dec = sum(dead_total),
miss = sum(missing_total),
ref = sum(refugees_total),
disp = sum(displaced_total),
shel = sum(in_shelter_total),
aff = sum(affected_total)
) %>%
arrange(desc(shel))
## Source: local data frame [22 x 7]
##
## iso dec miss ref disp shel aff
## 1 CU CUB NA NA NA 122 112 NA
## 2 VG VGB NA NA NA 22 87 NA
## 3 HT HTI NA NA NA 33 81 NA
## 4 DO DOM NA NA NA 50 73 NA
## 5 KN KNA NA NA NA 7 37 NA
## 6 US USA NA NA NA 3 33 NA
## 7 DO DOM NA NA NA 17 32 NA
## 8 AT ATG NA NA NA 17 29 NA
## 9 VI VIR 12 NA NA 3 21 NA
## 10 PR PRI NA NA NA 9 17 NA
## 11 TC TCA NA NA NA 14 14 NA
## 12 AI AIA NA NA NA 9 9 NA
## 13 BS BHS NA NA NA 8 9 NA
## 14 AG ATG NA NA NA 13 7 NA
## 15 NA NA NA 3 3 NA
## 16 DM NA NA NA 3 3 NA
## 17 SX SXM NA NA NA 3 3 NA
## 18 DM NA NA NA 2 2 NA
## 19 VG VBG NA NA NA 2 2 NA
## 20 BH BHS NA NA NA 1 1 NA
## 21 BH BHS NA NA NA 1 1 NA
## 22 TC NA NA NA 1 1 NA
Drug use by age (from Michael D)
x <- getURL("https://raw.githubusercontent.com/fivethirtyeight/data/master/drug-use-by-age/drug-use-by-age.csv")
y <- read.csv(text = x)
use <-
y %>%
gather(cat,val,ends_with(".use"),-age, -n) %>%
select(age,n,cat,val)
freq <-
y %>%
gather(cat,val,ends_with("frequency"),-age, -n) %>%
select(age,n,cat,val)
## Warning: attributes are not identical across measure variables; they will
## be dropped
#see the most commonly used substances
use %>%
group_by(age) %>%
filter(val==max(val)) %>%
arrange(age)
## Source: local data frame [17 x 4]
## Groups: age
##
## age n cat val
## 1 12 2798 alcohol.use 3.9
## 2 13 2757 alcohol.use 8.5
## 3 14 2792 alcohol.use 18.1
## 4 15 2956 alcohol.use 29.2
## 5 16 3058 alcohol.use 40.1
## 6 17 3038 alcohol.use 49.3
## 7 18 2469 alcohol.use 58.7
## 8 19 2223 alcohol.use 64.6
## 9 20 2271 alcohol.use 69.7
## 10 21 2354 alcohol.use 83.2
## 11 22-23 4707 alcohol.use 84.2
## 12 24-25 4591 alcohol.use 83.1
## 13 26-29 2628 alcohol.use 80.7
## 14 30-34 2864 alcohol.use 77.5
## 15 35-49 7391 alcohol.use 75.0
## 16 50-64 3923 alcohol.use 67.2
## 17 65+ 2448 alcohol.use 49.3
#see the most frequently used substances
freq %>%
group_by(age) %>%
filter(val==max(val)) %>%
arrange(age)
## Source: local data frame [20 x 4]
## Groups: age
##
## age n cat val
## 1 12 2798 hallucinogen.frequency 52
## 2 12 2798 tranquilizer.frequency 52
## 3 13 2757 alcohol.frequency 6
## 4 13 2757 hallucinogen.frequency 6
## 5 14 2792 cocaine.frequency 5.5
## 6 15 2956 crack.frequency 9.5
## 7 16 3058 stimulant.frequency 9.5
## 8 17 3038 pain.releiver.frequency 9
## 9 17 3038 stimulant.frequency 9
## 10 18 2469 stimulant.frequency 8
## 11 19 2223 oxycontin.frequency 7.5
## 12 20 2271 cocaine.frequency 8.0
## 13 21 2354 sedative.frequency 9
## 14 22-23 4707 heroin.frequency 57.5
## 15 24-25 4591 heroin.frequency 88.0
## 16 26-29 2628 stimulant.frequency 7
## 17 30-34 2864 cocaine.frequency 8.0
## 18 35-49 7391 tranquilizer.frequency 6
## 19 50-64 3923 crack.frequency 62.0
## 20 65+ 2448 alcohol.frequency 52
#performing the analysis from the discussion question (using the age as columns)
dcast(use, cat ~ age)
## Using val as value column: use value.var to override.
## cat 12 13 14 15 16 17 18 19 20 21 22-23
## 1 alcohol.use 3.9 8.5 18.1 29.2 40.1 49.3 58.7 64.6 69.7 83.2 84.2
## 2 marijuana.use 1.1 3.4 8.7 14.5 22.5 28.0 33.7 33.4 34.0 33.0 28.4
## 3 cocaine.use 0.1 0.1 0.1 0.5 1.0 2.0 3.2 4.1 4.9 4.8 4.5
## 4 crack.use 0.0 0.0 0.0 0.1 0.0 0.1 0.4 0.5 0.6 0.5 0.5
## 5 heroin.use 0.1 0.0 0.1 0.2 0.1 0.1 0.4 0.5 0.9 0.6 1.1
## 6 hallucinogen.use 0.2 0.6 1.6 2.1 3.4 4.8 7.0 8.6 7.4 6.3 5.2
## 7 inhalant.use 1.6 2.5 2.6 2.5 3.0 2.0 1.8 1.4 1.5 1.4 1.0
## 8 pain.releiver.use 2.0 2.4 3.9 5.5 6.2 8.5 9.2 9.4 10.0 9.0 10.0
## 9 oxycontin.use 0.1 0.1 0.4 0.8 1.1 1.4 1.7 1.5 1.7 1.3 1.7
## 10 tranquilizer.use 0.2 0.3 0.9 2.0 2.4 3.5 4.9 4.2 5.4 3.9 4.4
## 11 stimulant.use 0.2 0.3 0.8 1.5 1.8 2.8 3.0 3.3 4.0 4.1 3.6
## 12 meth.use 0.0 0.1 0.1 0.3 0.3 0.6 0.5 0.4 0.9 0.6 0.6
## 13 sedative.use 0.2 0.1 0.2 0.4 0.2 0.5 0.4 0.3 0.5 0.3 0.2
## 24-25 26-29 30-34 35-49 50-64 65+
## 1 83.1 80.7 77.5 75.0 67.2 49.3
## 2 24.9 20.8 16.4 10.4 7.3 1.2
## 3 4.0 3.2 2.1 1.5 0.9 0.0
## 4 0.5 0.4 0.5 0.5 0.4 0.0
## 5 0.7 0.6 0.4 0.1 0.1 0.0
## 6 4.5 3.2 1.8 0.6 0.3 0.1
## 7 0.8 0.6 0.4 0.3 0.2 0.0
## 8 9.0 8.3 5.9 4.2 2.5 0.6
## 9 1.3 1.2 0.9 0.3 0.4 0.0
## 10 4.3 4.2 3.6 1.9 1.4 0.2
## 11 2.6 2.3 1.4 0.6 0.3 0.0
## 12 0.7 0.6 0.4 0.2 0.2 0.0
## 13 0.2 0.4 0.4 0.3 0.2 0.0
College Scorecard (from Joshua S.)
x2 <- getURL("https://ed-public-download.app.cloud.gov/downloads/Most-Recent-Cohorts-Scorecard-Elements.csv")
y2 <- read.csv(text = x2,na.strings=c("NULL"))
#combining similar columns; the columns beginning with UGDS refer to ethnicity breakdowns
df <- y2 %>%
gather(cat,val,starts_with("UGDS"), -INSTNM, -STABBR) %>%
select(INSTNM,STABBR,cat,val) %>%
#removing the "total undergrads" fields
filter(cat != "UGDS") %>%
separate(cat,into=c("student.type","ethnicity"),sep="_")
#analysis of ethnicity by state, showing institutions with the highest concentrations of each category
df %>%
group_by(ethnicity,STABBR) %>%
filter(val == max(val)) %>%
arrange(STABBR)
## Source: local data frame [81 x 5]
## Groups: ethnicity, STABBR
##
## INSTNM STABBR
## 1 Alaska Bible College AK
## 2 Alaska Career College AK
## 3 Alaska Career College AK
## 4 Alaska Career College AK
## 5 Alaska Christian College AK
## 6 Ilisagvik College AK
## 7 University of Alaska Anchorage AK
## 8 Ilisagvik College AK
## 9 University of Alaska Fairbanks AK
## 10 American Samoa Community College AS
## 11 American Samoa Community College AS
## 12 American Samoa Community College AS
## 13 American Samoa Community College AS
## 14 American Samoa Community College AS
## 15 American Samoa Community College AS
## 16 American Samoa Community College AS
## 17 American Samoa Community College AS
## 18 American Samoa Community College AS
## 19 College of Micronesia-FSM FM
## 20 College of Micronesia-FSM FM
## 21 College of Micronesia-FSM FM
## 22 College of Micronesia-FSM FM
## 23 College of Micronesia-FSM FM
## 24 College of Micronesia-FSM FM
## 25 College of Micronesia-FSM FM
## 26 College of Micronesia-FSM FM
## 27 College of Micronesia-FSM FM
## 28 Pacific Islands University GU
## 29 Pacific Islands University GU
## 30 University of Guam GU
## 31 University of Guam GU
## 32 University of Guam GU
## 33 Pacific Islands University GU
## 34 Guam Community College GU
## 35 University of Guam GU
## 36 University of Guam GU
## 37 Mr Leon's School of Hair Design-Lewiston ID
## 38 Boise Barber College ID
## 39 Milan Institute-Nampa ID
## 40 Carrington College-Boise ID
## 41 Headmasters School of Hair Design ID
## 42 Mr Leon's School of Hair Design-Moscow ID
## 43 Boise Barber College ID
## 44 Idaho State University ID
## 45 University of Phoenix-Idaho ID
## 46 College of the Marshall Islands MH
## 47 College of the Marshall Islands MH
## 48 College of the Marshall Islands MH
## 49 College of the Marshall Islands MH
## 50 College of the Marshall Islands MH
## 51 College of the Marshall Islands MH
## 52 College of the Marshall Islands MH
## 53 College of the Marshall Islands MH
## 54 College of the Marshall Islands MH
## 55 Northern Marianas College MP
## 56 Northern Marianas College MP
## 57 Northern Marianas College MP
## 58 Northern Marianas College MP
## 59 Northern Marianas College MP
## 60 Northern Marianas College MP
## 61 Northern Marianas College MP
## 62 Northern Marianas College MP
## 63 Northern Marianas College MP
## 64 Palau Community College PW
## 65 Palau Community College PW
## 66 Palau Community College PW
## 67 Palau Community College PW
## 68 Palau Community College PW
## 69 Palau Community College PW
## 70 Palau Community College PW
## 71 Palau Community College PW
## 72 Palau Community College PW
## 73 Casper College WY
## 74 CollegeAmerica-Cheyenne WY
## 75 Cheeks International Academy of Beauty Culture-Cheyenne WY
## 76 Cheeks International Academy of Beauty Culture-Cheyenne WY
## 77 Central Wyoming College WY
## 78 CollegeAmerica-Cheyenne WY
## 79 Wyotech-Laramie WY
## 80 Western Wyoming Community College WY
## 81 Wyotech-Laramie WY
## Variables not shown: student.type (chr), ethnicity (chr), val (dbl)