Set flags to TRUE when you want to write out PNG plots and/or CSV data files

write.csv.enabled = F
write.plot.enabled = F

Load R libraries

library(ggplot2) 
## Warning: package 'ggplot2' was built under R version 3.2.4

Function used to write plot to PNG file

write.plot <- function(p, filename, width=1024, height=576, font.size=16) {
  png(filename,width, height)
  print(p + theme_bw(base_size=font.size))
  dev.off()
}

Function to pretty print

printf <- function(...) {
    sprintf(...)
}

Load Open Cluster data: Dias, W.S. et al., 2002. New catalogue of optically visible open clusters and candidates. Astronomy and Astrophysics, 389(3), pp.871–873.

raw <- read.csv("DAML02 V3.4.csv")
clusters <- raw[,2:ncol(raw)]
head(clusters)
##           name   RA2000    DE2000 diameter distance  age  nc
## 1  Berkeley 58 00:00:12 +60:58:00       11     2700 8.47  88
## 2     NGC 7801 00:00:21 +50:44:30        8     1275 9.23  85
## 3     FSR 0459 00:00:39 +59:14:20        2     3800 8.95   8
## 4     Stock 18 00:01:37 +64:37:30        6     1242 8.10 109
## 5  Berkeley 59 00:02:14 +67:25:00       10     1000 6.10   2
## 6 Berkeley 104 00:03:30 +63:35:00        3     4365 8.89  76
nrow(clusters)
## [1] 2167

Exract data for M35

m35 <- subset(clusters, name=="NGC 2168")
print(m35)
##         name   RA2000    DE2000 diameter distance  age   nc
## 421 NGC 2168 06:08:54 +24:20:00       40      912 8.25 2474

Distribution of Open Cluster Distance

xmin <- 0
xmax <- 5000
dx <- 100

h <- hist(subset(clusters, distance>=xmin & distance<=xmax)$distance, breaks=seq(xmin,xmax,dx), plot=F)
data <- data.frame(distance=h$mids, counts=h$counts)

ymin <- min(data$counts, na.rm=T)
ymax <- max(data$counts, na.rm=T)

Plot distribution of Open Cluster distance and mark M35

p <- ggplot(data, aes(distance, counts)) +
  geom_bar(stat="identity", width=dx) +
  geom_vline(xintercept=m35$distance) +
  geom_text(aes(x=m35$distance, y=ymax+5), hjust=0, label=" M35") +
  labs(title="Open Cluster Distance", x="Distance [pc]", y="Count") +
  theme_bw() +
  coord_cartesian(xlim=c(xmin,xmax))
print(p)

if (write.plot.enabled) write.plot(p, "Open Cluster Distance + M35.png")

Write data for Open Cluster distance to file

if (write.csv.enabled) write.csv(data, "Open Cluster Distance.csv")

Number of Open Clusters with valid distance values

nclusters <- nrow(subset(clusters, !is.na(distance)))
print(nclusters)
## [1] 2038

Number of Open Clusters closer/farther than M35

nclusters.closer <- nrow(subset(clusters, distance<m35$distance))
nclusters.farther <- nrow(subset(clusters, distance>m35$distance))
printf("%d/%d", nclusters.closer, nclusters.farther)
## [1] "360/1675"

Percent of Open Clusters closer/farther than M35

pclusters.closer <- 100 * nclusters.closer / nclusters
pcluser.farther <- 100 * nclusters.farther / nclusters
printf("%.0f/%.0f", pclusters.closer, pcluser.farther)
## [1] "18/82"

Summary statistics for Open Cluster distance

summary(clusters$distance)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##      25    1125    1786    2265    2800   14870     129

Distribution of Open Cluster Diameter

xmin <- 0
xmax <- 50
dx <- 1

h <- hist(subset(clusters, diameter>=xmin & diameter<=xmax)$diameter, breaks=seq(xmin,xmax,dx), plot=F)
data <- data.frame(diameter=h$mids, counts=h$counts)

ymin<- min(data$counts, na.rm=T)
ymax <- max(data$counts, na.rm=T)

Plot distribution of Open Cluster diameter and mark M35

p <- ggplot(data, aes(diameter, counts)) +
  geom_bar(stat="identity", width=dx) +
  geom_vline(xintercept=m35$diameter) +
  geom_text(aes(x=m35$diameter, y=ymax), hjust=0, label=" M35") +
  labs(title="Open Cluster Diameter", x="Diameter [arcmin]", y="Count") +
  theme_bw() +
  coord_cartesian(xlim=c(xmin,xmax))
print(p)

if (write.plot.enabled) write.plot(p, "Open Cluster Diameter + M35.png")

Write data for Open Cluster diameter to file

if (write.csv.enabled) write.csv(data, "Open Cluster Diameter.csv")

Number of Open Clusters with valid value for diameter

nclusters <- nrow(subset(clusters, !is.na(diameter)))
print(nclusters)
## [1] 2161

Number of Open Clusters smaller/larger than M35

nclusters.smaller <- nrow(subset(clusters, diameter<m35$diameter))
nclusters.larger <- nrow(subset(clusters, diameter>m35$diameter))
printf("%.0f/%.0f", nclusters.smaller, nclusters.larger)
## [1] "2033/120"

Percent of Open Clusters smaller/larger than M35

pclusters.smaller <- 100 * nclusters.smaller / nclusters
pclusters.larger <- 100 * nclusters.larger / nclusters
printf("%.0f/%.0f", pclusters.smaller, pclusters.larger)
## [1] "94/6"

Summary statistics for Open Cluster diameter

summary(clusters$diameter)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.20    3.00    5.00   14.46   12.00 1400.00       6

Distribution of Open Cluster Number of Stars

xmin <- 0
xmax <- 300
dx <- 10

h <- hist(subset(clusters, nc>=xmin & nc<=xmax)$nc, breaks=seq(xmin,xmax,dx), plot=F)
data <- data.frame(nstars=h$mids, counts=h$counts)

ymin<- min(data$counts, na.rm=T)
ymax <- max(data$counts, na.rm=T)

Plot distribution of Open Cluster number of stars and mark M35

p <- ggplot(data, aes(nstars, counts)) +
  geom_bar(stat="identity", width=dx) +
  geom_vline(xintercept=m35$nc) +
  geom_text(aes(x=m35$nc, y=ymax), hjust=0, label=" M35") +
  labs(title="Open Cluster Number of Stars", x="Number of Stars", y="Count") +
  theme_bw() +
  coord_cartesian(xlim=c(xmin,xmax))
print(p)

if (write.plot.enabled) write.plot(p, "Open Cluster Number of Stars + M35.png")

Write data for Open Cluster number of stars to file

if (write.csv.enabled) write.csv(data, "Open Cluster Number of Stars.csv")

Number of Open Clusters with valid value for number of stars

nclusters <- nrow(subset(clusters, !is.na(nc)))
print(nclusters)
## [1] 2102

Number of Open Clusters with less/more stars than M35

nclusters.less <- nrow(subset(clusters, nc<m35$nc))
nclusters.more <- nrow(subset(clusters, nc>m35$nc))
printf("%.0f/%.0f", nclusters.less, nclusters.more)
## [1] "2051/50"

Percent of Open Clusters with less/more stars than M35

pclusters.less <- 100 * nclusters.less / nclusters
pclusters.more <- 100 * nclusters.more / nclusters
printf("%.0f/%.0f", pclusters.less, pclusters.more)
## [1] "98/2"

Summary statistics for Open Cluster number of stars

summary(clusters$nc)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##     1.0    24.0    74.0   295.2   208.8 14340.0      65

Distribution of Open Cluster Age

xmin <- 6
xmax <- 10
dx <- 0.1

h <- hist(subset(clusters, age>=xmin & age<=xmax)$age, breaks=seq(xmin,xmax,dx), plot=F)
data <- data.frame(age=h$mids, counts=h$counts)

ymin <- min(data$counts, na.rm=T)
ymax <- max(data$counts, na.rm=T)

Plot distribution of Open Cluster age and mark M35

p <- ggplot(data, aes(age, counts)) +
  geom_bar(stat="identity", width=dx) +
  geom_vline(xintercept=m35$age) +
  geom_text(aes(x=m35$age, y=ymax), hjust=0, label=" M35") +
  labs(title="Open Cluster Age", x="Age [log yr]", y="Count") +
  xlim(xmin, xmax) +
  theme_bw() +
  coord_cartesian(xlim=c(xmin,xmax))
print(p)

if (write.plot.enabled) write.plot(p, "Open Cluster Age + M35.png")

Write data for Open Cluster age to file

if (write.csv.enabled) write.csv(data, "Open Cluster Age.csv")

Number of Open Clusters with valid value for age

nclusters <- nrow(subset(clusters, !is.na(age)))

Number of Open Clusters younger/older than M35

nclusters.younger <- nrow(subset(clusters, age<m35$age))
nclusters.older <- nrow(subset(clusters, age>m35$age))
printf("%.0f/%.0f", nclusters.younger, nclusters.older)
## [1] "759/1239"

Percent of Open Clusters younger/older than M35

pclusters.younger <- 100 * nclusters.younger / nclusters
pclusters.older <- 100 * nclusters.older / nclusters
printf("%.0f/%.0f", pclusters.younger, pclusters.older)
## [1] "38/62"

Summary statistics for Open Cluster age

summary(clusters$age)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   6.000   7.823   8.500   8.320   8.930  10.100     157

References

[1] Dias, W.S. et al., 2002. New catalogue of optically visible open clusters and candidates. Astronomy and Astrophysics, 389(3), pp.871–873.