## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## Warning: package 'tidyr' was built under R version 3.1.2
## Loading required package: bitops
## Warning: package 'reshape2' was built under R version 3.1.2

Humanitarian data (from Sam C-D)

#Load the file from local location (the header line is repeated, so some extra work is needed before
# loading the csv)

file <- "/Users/samdevries/Downloads/OP3_DANA_EN.xlsx - DANA.csv"
headers = read.csv(file, skip = 0, header = F, nrows = 1, as.is = T)
df = read.csv(file, skip = 3, header = F)
colnames(df)= headers

df$iso = paste(df$iso2,df$iso3)

#cleaning up the data by focusing on columns dealing with property destruction
destruct <-
df %>%
  gather(cat,total,ends_with("_destroyed"),ends_with("_affected"),-iso) %>%
  select(iso,cat,total)
## Warning: attributes are not identical across measure variables; they will
## be dropped
destruct_byCat <-
  destruct %>%
  separate(cat,into=c("cat1","cat2"),sep="_")

#getting a breakdown by region
destruct_byCat %>%
  group_by(iso,cat2) %>%
  summarise(
    total = n()
    )
## Source: local data frame [44 x 3]
## Groups: iso
## 
##        iso      cat2 total
## 1           affected    75
## 2          destroyed   123
## 3   AG ATG  affected   175
## 4   AG ATG destroyed   287
## 5   AI AIA  affected   225
## 6   AI AIA destroyed   369
## 7   AT ATG  affected   175
## 8   AT ATG destroyed   287
## 9  BH  BHS  affected    25
## 10 BH  BHS destroyed    41
## 11  BH BHS  affected    25
## 12  BH BHS destroyed    41
## 13  BS BHS  affected   125
## 14  BS BHS destroyed   205
## 15  CU CUB  affected  2475
## 16  CU CUB destroyed  4059
## 17     DM   affected    50
## 18     DM  destroyed    82
## 19    DM    affected    75
## 20    DM   destroyed   123
## 21 DO  DOM  affected    75
## 22 DO  DOM destroyed   123
## 23  DO DOM  affected   300
## 24  DO DOM destroyed   492
## 25  HT HTI  affected   350
## 26  HT HTI destroyed   574
## 27  KN KNA  affected   175
## 28  KN KNA destroyed   287
## 29  PR PRI  affected   225
## 30  PR PRI destroyed   369
## 31  SX SXM  affected    75
## 32  SX SXM destroyed   123
## 33     TC   affected    25
## 34     TC  destroyed    41
## 35  TC TCA  affected   350
## 36  TC TCA destroyed   574
## 37  US USA  affected    75
## 38  US USA destroyed   123
## 39  VG VBG  affected    50
## 40  VG VBG destroyed    82
## 41  VG VGB  affected   325
## 42  VG VGB destroyed   533
## 43  VI VIR  affected    75
## 44  VI VIR destroyed   123
#a by region analysis of total numbers of deceased, missing, etc. I chose to order by the number 
# in shelters as this might provide a relevant real-world value. Given these numbers, it might be
# helpful for an organization to have numbers like the total number of shelters and capactiy in a region
byIso <- df %>% group_by(iso) 
byIso %>% 
  summarise(
  dec = sum(dead_total),
  miss = sum(missing_total),
  ref = sum(refugees_total),
  disp = sum(displaced_total),
  shel = sum(in_shelter_total),
  aff = sum(affected_total)
  ) %>%
  arrange(desc(shel))
## Source: local data frame [22 x 7]
## 
##        iso dec miss ref disp shel aff
## 1   CU CUB  NA   NA  NA  122  112  NA
## 2   VG VGB  NA   NA  NA   22   87  NA
## 3   HT HTI  NA   NA  NA   33   81  NA
## 4   DO DOM  NA   NA  NA   50   73  NA
## 5   KN KNA  NA   NA  NA    7   37  NA
## 6   US USA  NA   NA  NA    3   33  NA
## 7  DO  DOM  NA   NA  NA   17   32  NA
## 8   AT ATG  NA   NA  NA   17   29  NA
## 9   VI VIR  12   NA  NA    3   21  NA
## 10  PR PRI  NA   NA  NA    9   17  NA
## 11  TC TCA  NA   NA  NA   14   14  NA
## 12  AI AIA  NA   NA  NA    9    9  NA
## 13  BS BHS  NA   NA  NA    8    9  NA
## 14  AG ATG  NA   NA  NA   13    7  NA
## 15          NA   NA  NA    3    3  NA
## 16    DM    NA   NA  NA    3    3  NA
## 17  SX SXM  NA   NA  NA    3    3  NA
## 18     DM   NA   NA  NA    2    2  NA
## 19  VG VBG  NA   NA  NA    2    2  NA
## 20 BH  BHS  NA   NA  NA    1    1  NA
## 21  BH BHS  NA   NA  NA    1    1  NA
## 22     TC   NA   NA  NA    1    1  NA

Drug use by age (from Michael D)

x <- getURL("https://raw.githubusercontent.com/fivethirtyeight/data/master/drug-use-by-age/drug-use-by-age.csv")
y <- read.csv(text = x)
use <-
y %>%
  gather(cat,val,ends_with(".use"),-age, -n) %>%
  select(age,n,cat,val)
freq <-
y %>%
  gather(cat,val,ends_with("frequency"),-age, -n) %>%
  select(age,n,cat,val)
## Warning: attributes are not identical across measure variables; they will
## be dropped
#see the most commonly used substances
use %>%
  group_by(age) %>%
  filter(val==max(val)) %>%
  arrange(age)
## Source: local data frame [17 x 4]
## Groups: age
## 
##      age    n         cat  val
## 1     12 2798 alcohol.use  3.9
## 2     13 2757 alcohol.use  8.5
## 3     14 2792 alcohol.use 18.1
## 4     15 2956 alcohol.use 29.2
## 5     16 3058 alcohol.use 40.1
## 6     17 3038 alcohol.use 49.3
## 7     18 2469 alcohol.use 58.7
## 8     19 2223 alcohol.use 64.6
## 9     20 2271 alcohol.use 69.7
## 10    21 2354 alcohol.use 83.2
## 11 22-23 4707 alcohol.use 84.2
## 12 24-25 4591 alcohol.use 83.1
## 13 26-29 2628 alcohol.use 80.7
## 14 30-34 2864 alcohol.use 77.5
## 15 35-49 7391 alcohol.use 75.0
## 16 50-64 3923 alcohol.use 67.2
## 17   65+ 2448 alcohol.use 49.3
#see the most frequently used substances
freq %>%
  group_by(age) %>%
  filter(val==max(val)) %>%
  arrange(age)
## Source: local data frame [20 x 4]
## Groups: age
## 
##      age    n                     cat  val
## 1     12 2798  hallucinogen.frequency   52
## 2     12 2798  tranquilizer.frequency   52
## 3     13 2757       alcohol.frequency    6
## 4     13 2757  hallucinogen.frequency    6
## 5     14 2792       cocaine.frequency  5.5
## 6     15 2956         crack.frequency  9.5
## 7     16 3058     stimulant.frequency  9.5
## 8     17 3038 pain.releiver.frequency    9
## 9     17 3038     stimulant.frequency    9
## 10    18 2469     stimulant.frequency    8
## 11    19 2223     oxycontin.frequency  7.5
## 12    20 2271       cocaine.frequency  8.0
## 13    21 2354      sedative.frequency    9
## 14 22-23 4707        heroin.frequency 57.5
## 15 24-25 4591        heroin.frequency 88.0
## 16 26-29 2628     stimulant.frequency    7
## 17 30-34 2864       cocaine.frequency  8.0
## 18 35-49 7391  tranquilizer.frequency    6
## 19 50-64 3923         crack.frequency 62.0
## 20   65+ 2448       alcohol.frequency   52
#performing the analysis from the discussion question (using the age as columns)
dcast(use, cat ~ age)
## Using val as value column: use value.var to override.
##                  cat  12  13   14   15   16   17   18   19   20   21 22-23
## 1        alcohol.use 3.9 8.5 18.1 29.2 40.1 49.3 58.7 64.6 69.7 83.2  84.2
## 2      marijuana.use 1.1 3.4  8.7 14.5 22.5 28.0 33.7 33.4 34.0 33.0  28.4
## 3        cocaine.use 0.1 0.1  0.1  0.5  1.0  2.0  3.2  4.1  4.9  4.8   4.5
## 4          crack.use 0.0 0.0  0.0  0.1  0.0  0.1  0.4  0.5  0.6  0.5   0.5
## 5         heroin.use 0.1 0.0  0.1  0.2  0.1  0.1  0.4  0.5  0.9  0.6   1.1
## 6   hallucinogen.use 0.2 0.6  1.6  2.1  3.4  4.8  7.0  8.6  7.4  6.3   5.2
## 7       inhalant.use 1.6 2.5  2.6  2.5  3.0  2.0  1.8  1.4  1.5  1.4   1.0
## 8  pain.releiver.use 2.0 2.4  3.9  5.5  6.2  8.5  9.2  9.4 10.0  9.0  10.0
## 9      oxycontin.use 0.1 0.1  0.4  0.8  1.1  1.4  1.7  1.5  1.7  1.3   1.7
## 10  tranquilizer.use 0.2 0.3  0.9  2.0  2.4  3.5  4.9  4.2  5.4  3.9   4.4
## 11     stimulant.use 0.2 0.3  0.8  1.5  1.8  2.8  3.0  3.3  4.0  4.1   3.6
## 12          meth.use 0.0 0.1  0.1  0.3  0.3  0.6  0.5  0.4  0.9  0.6   0.6
## 13      sedative.use 0.2 0.1  0.2  0.4  0.2  0.5  0.4  0.3  0.5  0.3   0.2
##    24-25 26-29 30-34 35-49 50-64  65+
## 1   83.1  80.7  77.5  75.0  67.2 49.3
## 2   24.9  20.8  16.4  10.4   7.3  1.2
## 3    4.0   3.2   2.1   1.5   0.9  0.0
## 4    0.5   0.4   0.5   0.5   0.4  0.0
## 5    0.7   0.6   0.4   0.1   0.1  0.0
## 6    4.5   3.2   1.8   0.6   0.3  0.1
## 7    0.8   0.6   0.4   0.3   0.2  0.0
## 8    9.0   8.3   5.9   4.2   2.5  0.6
## 9    1.3   1.2   0.9   0.3   0.4  0.0
## 10   4.3   4.2   3.6   1.9   1.4  0.2
## 11   2.6   2.3   1.4   0.6   0.3  0.0
## 12   0.7   0.6   0.4   0.2   0.2  0.0
## 13   0.2   0.4   0.4   0.3   0.2  0.0

College Scorecard (from Joshua S.)

x2 <- getURL("https://ed-public-download.app.cloud.gov/downloads/Most-Recent-Cohorts-Scorecard-Elements.csv")
y2 <- read.csv(text = x2,na.strings=c("NULL"))

#combining similar columns; the columns beginning with UGDS refer to ethnicity breakdowns
df <- y2 %>%
  gather(cat,val,starts_with("UGDS"), -INSTNM, -STABBR) %>%
  select(INSTNM,STABBR,cat,val) %>%
  #removing the "total undergrads" fields
  filter(cat != "UGDS") %>%
  separate(cat,into=c("student.type","ethnicity"),sep="_")

#analysis of ethnicity by state, showing institutions with the highest concentrations of each category
df %>%
  group_by(ethnicity,STABBR) %>%
  filter(val == max(val)) %>%
  arrange(STABBR)
## Source: local data frame [81 x 5]
## Groups: ethnicity, STABBR
## 
##                                                     INSTNM STABBR
## 1                                     Alaska Bible College     AK
## 2                                    Alaska Career College     AK
## 3                                    Alaska Career College     AK
## 4                                    Alaska Career College     AK
## 5                                 Alaska Christian College     AK
## 6                                        Ilisagvik College     AK
## 7                           University of Alaska Anchorage     AK
## 8                                        Ilisagvik College     AK
## 9                           University of Alaska Fairbanks     AK
## 10                        American Samoa Community College     AS
## 11                        American Samoa Community College     AS
## 12                        American Samoa Community College     AS
## 13                        American Samoa Community College     AS
## 14                        American Samoa Community College     AS
## 15                        American Samoa Community College     AS
## 16                        American Samoa Community College     AS
## 17                        American Samoa Community College     AS
## 18                        American Samoa Community College     AS
## 19                               College of Micronesia-FSM     FM
## 20                               College of Micronesia-FSM     FM
## 21                               College of Micronesia-FSM     FM
## 22                               College of Micronesia-FSM     FM
## 23                               College of Micronesia-FSM     FM
## 24                               College of Micronesia-FSM     FM
## 25                               College of Micronesia-FSM     FM
## 26                               College of Micronesia-FSM     FM
## 27                               College of Micronesia-FSM     FM
## 28                              Pacific Islands University     GU
## 29                              Pacific Islands University     GU
## 30                                      University of Guam     GU
## 31                                      University of Guam     GU
## 32                                      University of Guam     GU
## 33                              Pacific Islands University     GU
## 34                                  Guam Community College     GU
## 35                                      University of Guam     GU
## 36                                      University of Guam     GU
## 37                Mr Leon's School of Hair Design-Lewiston     ID
## 38                                    Boise Barber College     ID
## 39                                   Milan Institute-Nampa     ID
## 40                                Carrington College-Boise     ID
## 41                       Headmasters School of Hair Design     ID
## 42                  Mr Leon's School of Hair Design-Moscow     ID
## 43                                    Boise Barber College     ID
## 44                                  Idaho State University     ID
## 45                             University of Phoenix-Idaho     ID
## 46                         College of the Marshall Islands     MH
## 47                         College of the Marshall Islands     MH
## 48                         College of the Marshall Islands     MH
## 49                         College of the Marshall Islands     MH
## 50                         College of the Marshall Islands     MH
## 51                         College of the Marshall Islands     MH
## 52                         College of the Marshall Islands     MH
## 53                         College of the Marshall Islands     MH
## 54                         College of the Marshall Islands     MH
## 55                               Northern Marianas College     MP
## 56                               Northern Marianas College     MP
## 57                               Northern Marianas College     MP
## 58                               Northern Marianas College     MP
## 59                               Northern Marianas College     MP
## 60                               Northern Marianas College     MP
## 61                               Northern Marianas College     MP
## 62                               Northern Marianas College     MP
## 63                               Northern Marianas College     MP
## 64                                 Palau Community College     PW
## 65                                 Palau Community College     PW
## 66                                 Palau Community College     PW
## 67                                 Palau Community College     PW
## 68                                 Palau Community College     PW
## 69                                 Palau Community College     PW
## 70                                 Palau Community College     PW
## 71                                 Palau Community College     PW
## 72                                 Palau Community College     PW
## 73                                          Casper College     WY
## 74                                 CollegeAmerica-Cheyenne     WY
## 75 Cheeks International Academy of Beauty Culture-Cheyenne     WY
## 76 Cheeks International Academy of Beauty Culture-Cheyenne     WY
## 77                                 Central Wyoming College     WY
## 78                                 CollegeAmerica-Cheyenne     WY
## 79                                         Wyotech-Laramie     WY
## 80                       Western Wyoming Community College     WY
## 81                                         Wyotech-Laramie     WY
## Variables not shown: student.type (chr), ethnicity (chr), val (dbl)