set.seed(327401) #chosen at random, solely to make results consistent
s1 <- quakes[sample(nrow(quakes), 8000, TRUE), ]
s2 <- quakes[sample(nrow(quakes), 8000, TRUE), ]
s3 <- quakes[sample(nrow(quakes), 8000, TRUE), ]
s4 <- quakes[sample(nrow(quakes), 8000, TRUE), ]
s5 <- quakes[sample(nrow(quakes), 8000, TRUE), ]
table(s1[c("type", "magType")])
##                    magType
## type                  mb   ml   ms ms_20   mw  mwb  mwc  mwp  mwr  mww
##   earthquake        3739   19    1     1   45  293  129    5   93 3653
##   volcanic eruption    0    0    0     0   22    0    0    0    0    0
s1 %>% ggplot(aes(x=mag,fill=type)) + geom_histogram(bins=33) #number of bins for all samples set to 10*(max - min) + 1

summary(s1 %>% pluck("mag"))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   5.000   5.100   5.200   5.351   5.500   8.200
table(s2[c("type", "magType")])
##                    magType
## type                  mb   ml   ms   mw  mwb  mwc  mwp  mwr  mww
##   earthquake        3748   27    2   46  274  115    1  113 3650
##   nuclear explosion    3    0    0    0    0    0    0    0    0
##   volcanic eruption    0    0    0   21    0    0    0    0    0
s2 %>% ggplot(aes(x=mag,fill=type)) + geom_histogram(bins=37)

summary(s2 %>% pluck("mag"))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   5.000   5.100   5.200   5.334   5.500   8.600
table(s3[c("type", "magType")])
##                    magType
## type                  mb   Md   ml ml(texnet)   ms ms_20   mw  mwb  mwc  mwp
##   earthquake        3728    1   32          2    1     1   54  266  121    4
##   nuclear explosion    2    0    0          0    0     0    0    0    0    0
##   volcanic eruption    0    0    0          0    0     0   20    0    0    0
##                    magType
## type                 mwr  mww
##   earthquake          91 3677
##   nuclear explosion    0    0
##   volcanic eruption    0    0
s3 %>% ggplot(aes(x=mag,fill=type)) + geom_histogram(bins=34)

summary(s3 %>% pluck("mag"))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   5.000   5.100   5.200   5.352   5.500   8.300
table(s4[c("type", "magType")])
##                    magType
## type                  mb   ml   Ml   ms ms_20   mw  mwb  mwc  mwp  mwr  mww
##   earthquake        3670   21    1    2     1   53  297  137    3   93 3702
##   nuclear explosion    2    0    0    0     0    0    0    0    0    0    0
##   volcanic eruption    0    0    0    0     1   17    0    0    0    0    0
s4 %>% ggplot(aes(x=mag,fill=type)) + geom_histogram(bins=33)

summary(s4 %>% pluck("mag"))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   5.000   5.100   5.200   5.344   5.500   8.200
table(s5[c("type", "magType")])
##                    magType
## type                  mb   ml   Ml ml(texnet)   ms ms_20   mw  mwb  mwc  mwr
##   earthquake        3658   29    1          1    3     3   48  311  135   95
##   nuclear explosion    1    0    0          0    0     0    0    0    0    0
##   volcanic eruption    0    0    0          0    0     0   26    0    0    0
##                    magType
## type                 mww
##   earthquake        3689
##   nuclear explosion    0
##   volcanic eruption    0
s5 %>% ggplot(aes(x=mag,fill=type)) + geom_histogram(bins=34)

summary(s5 %>% pluck("mag"))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   5.000   5.100   5.200   5.345   5.500   8.300

For the most part, these subsets are all fairly similar - there are differences between any two in the counts of categorical variables (such as magnitude type and event type) and slight differences in the distribution of magnitudes (especially towards higher magnitudes), but the mean and median values for the magnitude is roughly the same in each sample. This is likely helped by the fact that a large majority of the data exists around these values.

Four out of the five samples included at least one nuclear explosion (with one even including three of the four), but one sample contained zero. Working solely off of this sample the presence of nuclear events in the dataset as a whole would be unexpected (and indeed it was unexpected, when I first looked through this dataset.)

Sample 2 was the only sample containing an earthquake with magnitude greater than 8.4 - this is not entirely unexpected, as there is only one such earthquake in the dataset, out of 20000 total datapoints.

All of the samples have between 3650 and 3750 earthquakes with magnitude type MB and approximately the same number of earthquakes with magnitude type MWW as with magnitude type MB. Events other than earthquakes comprise less than 0.5% of every sample.

The results of this sampling serve as a reminder that the data I have is not the complete picture, as the dataset lacks data on earthquakes with magnitude less than 5, earthquakes before March 11, 2012, and any earthquakes that have happened since the start of 2024, and I must continue to account for that moving forwards.