Guiding Question

Which countries host the most solar measurement stations, and what are the most common equipment types used globally?

This report uses simple data wrangling and visualization to surface two quick insights from the Global Solar Stations Inventory (Sept 2023).

Data Import & Formatting

library(tidyverse)
library(readr)
library(janitor)
library(knitr)
# NOTE: The CSV uses a Latin-1 encoding; set locale accordingly to avoid garbled characters.
data_path <- "global-solar-stations-inventory-september-2023-9.14.23-update.csv"

solar_raw <- readr::read_csv(
  file = data_path,
  locale = readr::locale(encoding = "Latin1")
)

# Clean column names to snake_case for easier handling
solar <- solar_raw |>
  janitor::clean_names()

# Quick structural checks
glimpse(solar)
## Rows: 97
## Columns: 20
## $ country                       <chr> "Armenia", "Armenia", "Armenia", "Armeni…
## $ nearest_settlement            <chr> "Hrazdan", "Masrik", "Talin", "Yerevan A…
## $ site_name                     <chr> "ARM_Solar_Hrazdan", "ARM_Solar_Masrik",…
## $ elevation                     <dbl> 1845, 1944, 1641, 946, 5, 184, 387, 285,…
## $ time_zone                     <chr> "UTC+03:00", "UTC+03:00", "UTC+03:00", "…
## $ equipment_type                <chr> NA, NA, NA, NA, "Helioscale omega statio…
## $ equipment_owner               <chr> NA, NA, NA, NA, "Suntrace GmbH", "CSP Se…
## $ partners                      <chr> NA, NA, NA, NA, "Suntrace GmbH", "CSP Se…
## $ host_institution              <chr> NA, NA, NA, NA, "Char Darbesh Adarsha Gr…
## $ project_founder               <chr> "World Bank", "World Bank", "World Bank"…
## $ commission_date               <chr> "20/05/2016", "20/05/2016", "20/05/2016"…
## $ start_of_measurement_campaign <chr> "20/05/2016", "20/05/2016", "20/05/2016"…
## $ measurement_data              <chr> "Hrazdan station  (https://energydata.in…
## $ documents_reports             <chr> NA, NA, NA, NA, "https://esmap.org/re-ma…
## $ end_of_measurement_campaign   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ decommission_date             <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ project_owner                 <chr> NA, NA, NA, NA, "World Bank", "World Ban…
## $ photo_gallery                 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ latitude                      <dbl> 40.511600, 40.207700, 40.386000, 40.1887…
## $ longitude                     <dbl> 44.82300, 45.76450, 43.89720, 44.39760, …
summary(select(solar, country, nearest_settlement, site_name, elevation, time_zone, equipment_type, latitude, longitude))
##    country          nearest_settlement  site_name           elevation     
##  Length:97          Length:97          Length:97          Min.   :   0.0  
##  Class :character   Class :character   Class :character   1st Qu.:  57.0  
##  Mode  :character   Mode  :character   Mode  :character   Median : 305.0  
##                                                           Mean   : 521.4  
##                                                           3rd Qu.: 946.0  
##                                                           Max.   :2363.0  
##   time_zone         equipment_type        latitude          longitude      
##  Length:97          Length:97          Min.   :-17.7097   Min.   :-16.413  
##  Class :character   Class :character   1st Qu.: -0.5434   1st Qu.: -1.073  
##  Mode  :character   Mode  :character   Median : 11.1024   Median : 33.515  
##                                        Mean   :  9.7287   Mean   : 41.303  
##                                        3rd Qu.: 14.7725   3rd Qu.: 72.984  
##                                        Max.   : 40.5116   Max.   :179.196

Tidy the Data

A few light transformations to make downstream operations straightforward.

solar_tidy <- solar |>
  mutate(
    country = as.factor(country),
    equipment_type = fct_explicit_na(as.factor(equipment_type), na_level = "Unknown"),
    time_zone = as.factor(time_zone)
  )

# Basic completeness check for key fields
solar_tidy |>
  summarize(
    n_rows = n(),
    missing_country = sum(is.na(country)),
    missing_equipment_type = sum(is.na(equipment_type)) # should be 0 due to explicit NA
  )
## # A tibble: 1 × 3
##   n_rows missing_country missing_equipment_type
##    <int>           <int>                  <int>
## 1     97               0                      0

Insight 1: Where are stations concentrated?

We count stations by country and highlight the top 10.

country_counts <- solar_tidy |>
  count(country, name = "n_stations") |>
  arrange(desc(n_stations))

# Show top 10 as a small table
country_counts |>
  slice_head(n = 10) |>
  knitr::kable(caption = "Top 10 Countries by Number of Solar Stations")
Top 10 Countries by Number of Solar Stations
country n_stations
Pakistan 9
Zambia 9
Mali 5
Nepal 5
Senegal 5
Tanzania 5
Vietnam 5
Armenia 4
Burkina Faso 4
Maldives 4

Visualization: Top Countries by Station Count

top_countries <- country_counts |>
  slice_head(n = 10) |>
  mutate(country = fct_reorder(country, n_stations))

ggplot(top_countries, aes(x = country, y = n_stations)) +
  geom_col() +
  coord_flip() +
  labs(
    title = "Top 10 Countries by Number of Solar Measurement Stations",
    x = "Country",
    y = "Number of Stations",
    caption = "Source: Global Solar Stations Inventory (Sept 2023)"
  ) +
  theme_minimal(base_size = 12)

Insight 2: What equipment types are most common?

We summarize the most frequently listed equipment types across all stations.

equipment_counts <- solar_tidy |>
  count(equipment_type, name = "n_stations") |>
  arrange(desc(n_stations))

equipment_counts |>
  slice_head(n = 10) |>
  knitr::kable(caption = "Most Common Equipment Types (Top 10)")
Most Common Equipment Types (Top 10)
equipment_type n_stations
Tier2 48
Tier1 30
Tier 1 5
Unknown 4
Helioscale omega station (Tier 1) 3
Tier 2 station with Rotating Shadowband Radiometer, Silicon (LI-COR) and Thermopile Pyranometer 3
Helioscale omega station (Tier 2) 1
Helioscale phi station (Tier 1) 1
Helioscale phi station (Tier 2) 1
Tier1  1