Set flags to TRUE when you want to write out PNG plots and/or CSV data files

write.csv.enabled = F
write.plot.enabled = F

Load R libraries

library(ggplot2) 
## Warning: package 'ggplot2' was built under R version 3.2.4

Function used to write plot to PNG file

write.plot <- function(p, filename, width=1024, height=576, font.size=16) {
  png(filename,width, height)
  print(p + theme_bw(base_size=font.size))
  dev.off()
}

Function to pretty print

printf <- function(...) {
    sprintf(...)
}

Load Open Cluster data: Dias, W.S. et al., 2002. New catalogue of optically visible open clusters and candidates. Astronomy and Astrophysics, 389(3), pp.871–873.

raw <- read.csv("DAML02 V3.4.csv")
clusters <- raw[,2:ncol(raw)]
head(clusters)
##           name   RA2000    DE2000 diameter distance  age  nc
## 1  Berkeley 58 00:00:12 +60:58:00       11     2700 8.47  88
## 2     NGC 7801 00:00:21 +50:44:30        8     1275 9.23  85
## 3     FSR 0459 00:00:39 +59:14:20        2     3800 8.95   8
## 4     Stock 18 00:01:37 +64:37:30        6     1242 8.10 109
## 5  Berkeley 59 00:02:14 +67:25:00       10     1000 6.10   2
## 6 Berkeley 104 00:03:30 +63:35:00        3     4365 8.89  76
nrow(clusters)
## [1] 2167

Exract data for M67

m67 <- subset(clusters, name=="NGC 2682")
print(m67)
##         name   RA2000    DE2000 diameter distance  age  nc
## 953 NGC 2682 08:51:18 +11:48:00       25      808 9.45 399

Distribution of Open Cluster Distance

xmin <- 0
xmax <- 5000
dx <- 100

h <- hist(subset(clusters, distance>=xmin & distance<=xmax)$distance, breaks=seq(xmin,xmax,dx), plot=F)
data <- data.frame(distance=h$mids, counts=h$counts)

ymin <- min(data$counts, na.rm=T)
ymax <- max(data$counts, na.rm=T)

Plot distribution of Open Cluster distance and mark M67

p <- ggplot(data, aes(distance, counts)) +
  geom_bar(stat="identity", width=dx) +
  geom_vline(xintercept=m67$distance) +
  geom_text(aes(x=m67$distance, y=ymax+5), hjust=0, label=" M67") +
  labs(title="Open Cluster Distance", x="Distance [pc]", y="Count") +
  theme_bw() +
  coord_cartesian(xlim=c(xmin,xmax))
print(p)

if (write.plot.enabled) write.plot(p, "Open Cluster Distance + M67.png")

Write data for Open Cluster distance to file

if (write.csv.enabled) write.csv(data, "Open Cluster Distance.csv")

Number of Open Clusters with valid distance values

nclusters <- nrow(subset(clusters, !is.na(distance)))
print(nclusters)
## [1] 2038

Number of Open Clusters closer/farther than M67

nclusters.closer <- nrow(subset(clusters, distance<m67$distance))
nclusters.farther <- nrow(subset(clusters, distance>m67$distance))
printf("%d/%d", nclusters.closer, nclusters.farther)
## [1] "288/1748"

Percent of Open Clusters closer/farther than M67

pclusters.closer <- 100 * nclusters.closer / nclusters
pcluser.farther <- 100 * nclusters.farther / nclusters
printf("%.0f/%.0f", pclusters.closer, pcluser.farther)
## [1] "14/86"

Summary statistics for Open Cluster distance

summary(clusters$distance)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##      25    1125    1786    2265    2800   14870     129

Distribution of Open Cluster Diameter

xmin <- 0
xmax <- 50
dx <- 1

h <- hist(subset(clusters, diameter>=xmin & diameter<=xmax)$diameter, breaks=seq(xmin,xmax,dx), plot=F)
data <- data.frame(diameter=h$mids, counts=h$counts)

ymin<- min(data$counts, na.rm=T)
ymax <- max(data$counts, na.rm=T)

Plot distribution of Open Cluster diameter and mark M67

p <- ggplot(data, aes(diameter, counts)) +
  geom_bar(stat="identity", width=dx) +
  geom_vline(xintercept=m67$diameter) +
  geom_text(aes(x=m67$diameter, y=ymax), hjust=0, label=" M67") +
  labs(title="Open Cluster Diameter", x="Diameter [arcmin]", y="Count") +
  theme_bw() +
  coord_cartesian(xlim=c(xmin,xmax))
print(p)

if (write.plot.enabled) write.plot(p, "Open Cluster Diameter + M67.png")

Write data for Open Cluster diameter to file

if (write.csv.enabled) write.csv(data, "Open Cluster Diameter.csv")

Number of Open Clusters with valid value for diameter

nclusters <- nrow(subset(clusters, !is.na(diameter)))
print(nclusters)
## [1] 2161

Number of Open Clusters smaller/larger than M67

nclusters.smaller <- nrow(subset(clusters, diameter<m67$diameter))
nclusters.larger <- nrow(subset(clusters, diameter>m67$diameter))
printf("%.0f/%.0f", nclusters.smaller, nclusters.larger)
## [1] "1891/243"

Percent of Open Clusters smaller/larger than M67

pclusters.smaller <- 100 * nclusters.smaller / nclusters
pclusters.larger <- 100 * nclusters.larger / nclusters
printf("%.0f/%.0f", pclusters.smaller, pclusters.larger)
## [1] "88/11"

Summary statistics for Open Cluster diameter

summary(clusters$diameter)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.20    3.00    5.00   14.46   12.00 1400.00       6

Distribution of Open Cluster Number of Stars

xmin <- 0
xmax <- 500
dx <- 10

h <- hist(subset(clusters, nc>=xmin & nc<=xmax)$nc, breaks=seq(xmin,xmax,dx), plot=F)
data <- data.frame(nstars=h$mids, counts=h$counts)

ymin<- min(data$counts, na.rm=T)
ymax <- max(data$counts, na.rm=T)

Plot distribution of Open Cluster number of stars and mark M67

p <- ggplot(data, aes(nstars, counts)) +
  geom_bar(stat="identity", width=dx) +
  geom_vline(xintercept=m67$nc) +
  geom_text(aes(x=m67$nc, y=ymax), hjust=0, label=" M67") +
  labs(title="Open Cluster Number of Stars", x="Number of Stars", y="Count") +
  theme_bw() +
  coord_cartesian(xlim=c(xmin,xmax))
print(p)

if (write.plot.enabled) write.plot(p, "Open Cluster Number of Stars + M67.png")

Write data for Open Cluster number of stars to file

if (write.csv.enabled) write.csv(data, "Open Cluster Number of Stars.csv")

Number of Open Clusters with valid value for number of stars

nclusters <- nrow(subset(clusters, !is.na(nc)))
print(nclusters)
## [1] 2102

Number of Open Clusters with less/more stars than M67

nclusters.less <- nrow(subset(clusters, nc<m67$nc))
nclusters.more <- nrow(subset(clusters, nc>m67$nc))
printf("%.0f/%.0f", nclusters.less, nclusters.more)
## [1] "1779/322"

Percent of Open Clusters with less/more stars than M67

pclusters.less <- 100 * nclusters.less / nclusters
pclusters.more <- 100 * nclusters.more / nclusters
printf("%.0f/%.0f", pclusters.less, pclusters.more)
## [1] "85/15"

Summary statistics for Open Cluster number of stars

summary(clusters$nc)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##     1.0    24.0    74.0   295.2   208.8 14340.0      65

Distribution of Open Cluster Age

xmin <- 6
xmax <- 10
dx <- 0.1

h <- hist(subset(clusters, age>=xmin & age<=xmax)$age, breaks=seq(xmin,xmax,dx), plot=F)
data <- data.frame(age=h$mids, counts=h$counts)

ymin <- min(data$counts, na.rm=T)
ymax <- max(data$counts, na.rm=T)

Plot distribution of Open Cluster age and mark M67

p <- ggplot(data, aes(age, counts)) +
  geom_bar(stat="identity", width=dx) +
  geom_vline(xintercept=m67$age) +
  geom_text(aes(x=m67$age, y=ymax), hjust=0, label=" M67") +
  labs(title="Open Cluster Age", x="Age [log yr]", y="Count") +
  xlim(xmin, xmax) +
  theme_bw() +
  coord_cartesian(xlim=c(xmin,xmax))
print(p)

if (write.plot.enabled) write.plot(p, "Open Cluster Age + M67.png")

Write data for Open Cluster age to file

if (write.csv.enabled) write.csv(data, "Open Cluster Age.csv")

Number of Open Clusters with valid value for age

nclusters <- nrow(subset(clusters, !is.na(age)))

Number of Open Clusters younger/older than M67

nclusters.younger <- nrow(subset(clusters, age<m67$age))
nclusters.older <- nrow(subset(clusters, age>m67$age))
printf("%.0f/%.0f", nclusters.younger, nclusters.older)
## [1] "1921/83"

Percent of Open Clusters younger/older than M67

pclusters.younger <- 100 * nclusters.younger / nclusters
pclusters.older <- 100 * nclusters.older / nclusters
printf("%.0f/%.0f", pclusters.younger, pclusters.older)
## [1] "96/4"

Summary statistics for Open Cluster age

summary(clusters$age)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   6.000   7.823   8.500   8.320   8.930  10.100     157

References

[1] Dias, W.S. et al., 2002. New catalogue of optically visible open clusters and candidates. Astronomy and Astrophysics, 389(3), pp.871–873.