Which countries host the most solar measurement stations, and what are the most common equipment types used globally?
This report uses simple data wrangling and visualization to surface two quick insights from the Global Solar Stations Inventory (Sept 2023).
library(tidyverse)
library(readr)
library(janitor)
library(knitr)
# NOTE: The CSV uses a Latin-1 encoding; set locale accordingly to avoid garbled characters.
data_path <- "global-solar-stations-inventory-september-2023-9.14.23-update.csv"
solar_raw <- readr::read_csv(
file = data_path,
locale = readr::locale(encoding = "Latin1")
)
# Clean column names to snake_case for easier handling
solar <- solar_raw |>
janitor::clean_names()
# Quick structural checks
glimpse(solar)
## Rows: 97
## Columns: 20
## $ country <chr> "Armenia", "Armenia", "Armenia", "Armeni…
## $ nearest_settlement <chr> "Hrazdan", "Masrik", "Talin", "Yerevan A…
## $ site_name <chr> "ARM_Solar_Hrazdan", "ARM_Solar_Masrik",…
## $ elevation <dbl> 1845, 1944, 1641, 946, 5, 184, 387, 285,…
## $ time_zone <chr> "UTC+03:00", "UTC+03:00", "UTC+03:00", "…
## $ equipment_type <chr> NA, NA, NA, NA, "Helioscale omega statio…
## $ equipment_owner <chr> NA, NA, NA, NA, "Suntrace GmbH", "CSP Se…
## $ partners <chr> NA, NA, NA, NA, "Suntrace GmbH", "CSP Se…
## $ host_institution <chr> NA, NA, NA, NA, "Char Darbesh Adarsha Gr…
## $ project_founder <chr> "World Bank", "World Bank", "World Bank"…
## $ commission_date <chr> "20/05/2016", "20/05/2016", "20/05/2016"…
## $ start_of_measurement_campaign <chr> "20/05/2016", "20/05/2016", "20/05/2016"…
## $ measurement_data <chr> "Hrazdan station (https://energydata.in…
## $ documents_reports <chr> NA, NA, NA, NA, "https://esmap.org/re-ma…
## $ end_of_measurement_campaign <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ decommission_date <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ project_owner <chr> NA, NA, NA, NA, "World Bank", "World Ban…
## $ photo_gallery <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ latitude <dbl> 40.511600, 40.207700, 40.386000, 40.1887…
## $ longitude <dbl> 44.82300, 45.76450, 43.89720, 44.39760, …
summary(select(solar, country, nearest_settlement, site_name, elevation, time_zone, equipment_type, latitude, longitude))
## country nearest_settlement site_name elevation
## Length:97 Length:97 Length:97 Min. : 0.0
## Class :character Class :character Class :character 1st Qu.: 57.0
## Mode :character Mode :character Mode :character Median : 305.0
## Mean : 521.4
## 3rd Qu.: 946.0
## Max. :2363.0
## time_zone equipment_type latitude longitude
## Length:97 Length:97 Min. :-17.7097 Min. :-16.413
## Class :character Class :character 1st Qu.: -0.5434 1st Qu.: -1.073
## Mode :character Mode :character Median : 11.1024 Median : 33.515
## Mean : 9.7287 Mean : 41.303
## 3rd Qu.: 14.7725 3rd Qu.: 72.984
## Max. : 40.5116 Max. :179.196
A few light transformations to make downstream operations straightforward.
solar_tidy <- solar |>
mutate(
country = as.factor(country),
equipment_type = fct_explicit_na(as.factor(equipment_type), na_level = "Unknown"),
time_zone = as.factor(time_zone)
)
# Basic completeness check for key fields
solar_tidy |>
summarize(
n_rows = n(),
missing_country = sum(is.na(country)),
missing_equipment_type = sum(is.na(equipment_type)) # should be 0 due to explicit NA
)
## # A tibble: 1 × 3
## n_rows missing_country missing_equipment_type
## <int> <int> <int>
## 1 97 0 0
We count stations by country and highlight the top 10.
country_counts <- solar_tidy |>
count(country, name = "n_stations") |>
arrange(desc(n_stations))
# Show top 10 as a small table
country_counts |>
slice_head(n = 10) |>
knitr::kable(caption = "Top 10 Countries by Number of Solar Stations")
country | n_stations |
---|---|
Pakistan | 9 |
Zambia | 9 |
Mali | 5 |
Nepal | 5 |
Senegal | 5 |
Tanzania | 5 |
Vietnam | 5 |
Armenia | 4 |
Burkina Faso | 4 |
Maldives | 4 |
top_countries <- country_counts |>
slice_head(n = 10) |>
mutate(country = fct_reorder(country, n_stations))
ggplot(top_countries, aes(x = country, y = n_stations)) +
geom_col() +
coord_flip() +
labs(
title = "Top 10 Countries by Number of Solar Measurement Stations",
x = "Country",
y = "Number of Stations",
caption = "Source: Global Solar Stations Inventory (Sept 2023)"
) +
theme_minimal(base_size = 12)
We summarize the most frequently listed equipment types across all stations.
equipment_counts <- solar_tidy |>
count(equipment_type, name = "n_stations") |>
arrange(desc(n_stations))
equipment_counts |>
slice_head(n = 10) |>
knitr::kable(caption = "Most Common Equipment Types (Top 10)")
equipment_type | n_stations |
---|---|
Tier2 | 48 |
Tier1 | 30 |
Tier 1 | 5 |
Unknown | 4 |
Helioscale omega station (Tier 1) | 3 |
Tier 2 station with Rotating Shadowband Radiometer, Silicon (LI-COR) and Thermopile Pyranometer | 3 |
Helioscale omega station (Tier 2) | 1 |
Helioscale phi station (Tier 1) | 1 |
Helioscale phi station (Tier 2) | 1 |
Tier1 | 1 |