Dane wejściowe mają format pliku xlsx i zawierają 8 arkuszy: arkusz z monitoring sites coordinates oraz 7 arkuszy nazwanych od nazwy zmiennej zależnej, zawierających wartości kolejnych zmiennych objaśniających w kolumnach.

KROK 1: wczytanie danych i opis wizualny

Sekcja skopiowana bezpośrednio z markdown, którym realizowałam zajęcia z ekonometrii przestrzennej; służy wizualnemu opisowi danych.

Pakiety

pkgs = c(
  "readxl",
  "dplyr",
  "stringr",
  "ggplot2",
  "knitr",
  "sf",
  "rnaturalearth"
)
invisible(lapply(pkgs, library, character.only = TRUE))

Ustawienia kolorów i motywu wykresów

# kolorki
COL_BG      = "#F7F7FB"
COL_DARK    = "#2E3440"
COL_GREY    = "#D8DEE9"
COL_PURPLE_DARK = "#3B0F70"
COL_PURPLE      = "#8C2981"
COL_PINK        = "#DE4968"
COL_PEACH       = "#FE9F6D"
COL_YELLOW      = "#FCFDBF"
PAL_CONT = c(
  COL_PURPLE_DARK,
  COL_PURPLE,
  COL_PINK,
  COL_PEACH,
  COL_YELLOW
)
SCALE_CONT_FILL = scale_fill_gradientn(colours = PAL_CONT, name = NULL)
SCALE_CONT_COL  = scale_color_gradientn(colours = PAL_CONT, name = NULL)
pal_discrete = function(n) {
  base = c(COL_PURPLE_DARK, COL_PURPLE, COL_PINK, COL_PEACH, COL_YELLOW)
  if (n <= length(base)) return(base[1:n])
  grDevices::colorRampPalette(base)(n)
}
theme_spojny = theme_minimal(base_size = 12) +
  theme(
    plot.title = element_text(face = "bold", color = COL_DARK),
    plot.subtitle = element_text(color = COL_DARK),
    legend.title = element_text(face = "bold", color = COL_DARK),
    legend.text  = element_text(color = COL_DARK),
    panel.grid.minor = element_blank(),
    panel.grid.major = element_line(color = "grey92"),
    strip.text = element_text(face = "bold", color = COL_DARK)
  )

Wczytanie arkuszy

PATH = "C:/Users/majed/OneDrive/Pulpit/smieci.xlsx"
sheet_coords = "Site coordinates"
sheets_all   = readxl::excel_sheets(PATH)
sheets_cat   = sheets_all[sheets_all != sheet_coords]

coords = readxl::read_excel(PATH, sheet = sheet_coords) %>%
  dplyr::rename(site_id = `Monitoring site ID`) %>%
  dplyr::mutate(
    site_id = as.integer(site_id),
    lat = as.numeric(lat),
    lon = as.numeric(lon)
  )

litter_list = list()
for (sh in sheets_cat) {
  df = readxl::read_excel(PATH, sheet = sh)
  y_col = names(df)[stringr::str_detect(names(df), "^Y_")]
  
  df2 = df %>%
    dplyr::rename(site_id = `Monitoring site ID`, y = dplyr::all_of(y_col)) %>%
    dplyr::mutate(
      site_id = as.integer(site_id),
      y = as.numeric(y),
      category_sheet = sh,
      category = sh %>%
        stringr::str_remove("^\\d+_") %>%
        stringr::str_replace_all("_", " ") %>%
        stringr::str_trim()
    ) %>%
    dplyr::left_join(coords, by = "site_id")
  
  litter_list[[sh]] = df2
}
litter_long = dplyr::bind_rows(litter_list)

Statystyki opisowe

tab_summary = litter_long %>%
  dplyr::group_by(category) %>%
  dplyr::summarise(
    n_sites   = sum(!is.na(y)),
    total     = sum(y, na.rm = TRUE),
    mean      = mean(y, na.rm = TRUE),
    median    = median(y, na.rm = TRUE),
    sd        = sd(y, na.rm = TRUE),
    min       = min(y, na.rm = TRUE),
    max       = max(y, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  dplyr::arrange(desc(mean))


knitr::kable(
  tab_summary,
  digits = 2,
  col.names = c("Typ śmieci", "Liczba obszarów monitoringu", "Suma",
                "Średnia", "Mediana", "SD", "Min", "Max"),
  caption = "Statystyki opisowe"
)
Statystyki opisowe
Typ śmieci Liczba obszarów monitoringu Suma Średnia Mediana SD Min Max
Cigarette butts 44 4783 108.70 67.5 114.62 2 514
Plastic ropes 44 588 13.36 8.5 14.53 1 70
Styrofoam and polystyrene 44 506 11.50 6.0 16.81 0 103
Plastic bags 44 450 10.23 7.0 9.35 0 36
Plastic bottle caps 44 351 7.98 4.0 11.85 0 66
SUP dishes and cutlery 44 170 3.86 3.0 4.04 0 19
Beverage containers 44 66 1.50 1.0 1.77 0 7

Rozkład liczby śmieci

ggplot(litter_long, aes(x = log1p(y))) +
  geom_histogram(
    bins = 25,
    fill = "grey",
    color = "white",
    linewidth = 0.2,
    alpha = 0.9
  ) +
  facet_wrap(~ category, scales = "free_y", ncol = 3) +
  labs(
    title = "Dystrybucja przestrzenna rozkładu śmieci",
    x = "log(1 + y)",
    y = "Liczba obszarów monitoringu"
  ) +
  theme_spojny +
  theme(
    legend.position = "none",
    strip.background = element_rect(fill = "white", color = NA)
  )

Rozmieszczenie przestrzenne

latvia_sf_4326 = rnaturalearth::ne_countries(
  country = "Latvia", scale = "medium", returnclass = "sf"
) %>% sf::st_make_valid() %>% sf::st_transform(4326)
litter_sf_4326 = litter_long %>%
  dplyr::filter(!is.na(lat), !is.na(lon)) %>%
  sf::st_as_sf(coords = c("lon", "lat"), crs = 4326)
ggplot() +
  geom_sf(
    data = latvia_sf_4326,
    fill = COL_BG,
    color = COL_DARK,
    linewidth = 0.35
  ) +
  geom_sf(
    data = litter_sf_4326,
    aes(size = y, color = y),
    alpha = 0.8
  ) +
  facet_wrap(~ category) +
  scale_size_continuous(
    name = "Liczba jednostek",
    range = c(1.2, 6)
  ) +
  scale_color_gradientn(
    colours = PAL_CONT,
    name = "Liczba jednostek",
    trans = "sqrt"
  ) +
  labs(
    title = "Rozmieszczenie przestrzenne obszarów monitoringu i jednostek śmieci",
    subtitle = "Rozmiar i kolor punktu odzwierciedlają liczbę obserwacji"
  ) +
  theme_spojny +
  theme(
    axis.title = element_blank(),
    axis.text = element_blank(),
    axis.ticks = element_blank(),
    panel.grid = element_blank(),
    legend.position = "bottom"
  )

KROK 2: kod właściwy (modele)

Estymowane modele to kandydaci: Poisson, ujemny dwumianowy (NB) oraz ich zero-inflated odpowiedniki (dla niektórych typów śmieci udział obserwacji zerowych wynosi około 30%).

Pakiety

library("readxl")
library("MASS")
library("pscl")
library("stargazer")
library("lmtest")
library("systemfit")

Przygotowanie danych

Każda kolumna ze zmienną objaśniającą ma nazewnictwo na wzór X1_"nazwa zmiennej". Dla wygody kodowania modeli skracam nazwy do X1X14, a pełne etykiety wrzucam na wydrukach

# etykietki dla kolumn - lekko skracam nazwy zeby miescily sie na wydrukach
nazwy_litter = c("Cigarette buds","Plastic bags","Bottle caps",
                 "Beverage cont.","SUP","Plastic ropes","Styrofoam")

litter_data = list()
for (i in 1:7) {
  dane = as.data.frame(litter_list[[i]])[, 2:16]
  names(dane) = c("y","X1","X2","X3","X4","X5","X6","X7","X8","X9",
                  "X10","X11","X12","X13","X14")
  dane = na.omit(dane)
  dane$lnX12 = log(dane$X12)    # liczba mieszkancow - logarytm
  dane$lnX13 = log(dane$X13)    # noclegi turystyczne - logarytm
  litter_data[[i]] = dane
}

# etykietki pod wydruki
etykiety = c("BathingZoneManag","BlueFlag","GarbageBin","BeachCleanup",
             "BeachCafe","CoastCafe","Accommod","Parking","Shops",
             "PublTransport","BeachUseCateg","ln(LocalResidents)",
             "ln(TouristNights2024)","LVSeaBasin")

# dane po arkuszach i typach smieci
d1 = litter_data[[1]]   # Cigarette butts
d2 = litter_data[[2]]   # Plastic bags
d3 = litter_data[[3]]   # Plastic bottle caps
d4 = litter_data[[4]]   # Beverage containers
d5 = litter_data[[5]]   # SUP dishes and cutlery
d6 = litter_data[[6]]   # Plastic ropes
d7 = litter_data[[7]]   # Styrofoam and polystyrene

Model Poissona

pois1 = glm(y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14, family=poisson, data=d1)
pois2 = glm(y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14, family=poisson, data=d2)
pois3 = glm(y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14, family=poisson, data=d3)
pois4 = glm(y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14, family=poisson, data=d4)
pois5 = glm(y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14, family=poisson, data=d5)
pois6 = glm(y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14, family=poisson, data=d6)
pois7 = glm(y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14, family=poisson, data=d7)

stargazer(pois1, pois2, pois3, pois4, pois5, pois6, pois7,
          type="html", column.labels=nazwy_litter, model.numbers=FALSE,
          covariate.labels=etykiety)
Dependent variable:
y
Cigarette buds Plastic bags Bottle caps Beverage cont. SUP Plastic ropes Styrofoam
BathingZoneManag 0.384*** -0.307* -0.349* 0.646 0.789*** -0.300* -0.516***
(0.060) (0.186) (0.204) (0.430) (0.264) (0.160) (0.186)
BlueFlag -1.147*** -1.290*** -1.514*** -1.302* -0.095 -0.240 0.215
(0.075) (0.257) (0.293) (0.787) (0.350) (0.246) (0.258)
GarbageBin 1.477*** 1.003*** 1.696*** -0.904* 0.745*** 0.198 -0.272
(0.055) (0.167) (0.195) (0.467) (0.270) (0.127) (0.175)
BeachCleanup -0.110* -0.239 0.467** -0.012 -0.412* -0.384*** 0.576***
(0.056) (0.157) (0.186) (0.319) (0.233) (0.104) (0.125)
BeachCafe 0.744*** 1.345*** 1.013*** 1.429* 0.502* 1.124*** 1.058***
(0.055) (0.207) (0.265) (0.793) (0.304) (0.233) (0.252)
CoastCafe 0.033 -0.590*** -0.773*** -0.664* 0.399 -0.106 0.575***
(0.053) (0.158) (0.204) (0.391) (0.246) (0.129) (0.162)
Accommod -0.204*** -0.537*** -0.600*** 1.410*** -0.001 0.411*** -0.874***
(0.051) (0.147) (0.172) (0.497) (0.247) (0.126) (0.137)
Parking 0.357*** 0.544** -0.226 -0.302 0.896** 0.822*** -0.671***
(0.076) (0.220) (0.216) (0.589) (0.385) (0.187) (0.208)
Shops -0.177*** -0.184 0.441** -0.769** 0.447** -0.857*** 0.644***
(0.040) (0.136) (0.174) (0.380) (0.227) (0.119) (0.137)
PublTransport 0.486*** 0.435*** 0.483** 0.642 -0.530* 0.164 -0.413***
(0.052) (0.166) (0.211) (0.435) (0.286) (0.137) (0.159)
BeachUseCateg -0.308*** 0.541*** -0.271 0.693 -1.175*** 0.218 0.541**
(0.069) (0.195) (0.211) (0.578) (0.323) (0.173) (0.211)
ln(LocalResidents) 0.185*** -0.034 -0.182** -0.079 0.256** 0.073 -0.482***
(0.025) (0.073) (0.089) (0.199) (0.125) (0.061) (0.071)
ln(TouristNights2024) -0.087** -0.006 0.109 -0.783** 0.258 -0.294*** 0.019
(0.039) (0.117) (0.143) (0.378) (0.182) (0.103) (0.116)
LVSeaBasin 0.061 -0.277** -1.022*** -0.049 -0.138 0.209* 0.092
(0.044) (0.130) (0.159) (0.308) (0.192) (0.108) (0.126)
Constant 3.403*** 1.972* 3.255** 7.784** -1.815 4.316*** 4.511***
(0.332) (1.073) (1.360) (3.275) (1.618) (0.933) (1.076)
Observations 44 44 44 44 44 44 44
Log Likelihood -639.646 -152.947 -160.862 -66.099 -102.788 -241.218 -220.566
Akaike Inf. Crit. 1,309.292 335.894 351.725 162.197 235.576 512.437 471.133
Note: p<0.1; p<0.05; p<0.01

Model ujemny dwumianowy (NB)

nb1 = glm.nb(y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14, data=d1)
nb2 = glm.nb(y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14, data=d2)
nb3 = glm.nb(y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14, data=d3)
nb4 = glm.nb(y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14, data=d4)
nb5 = glm.nb(y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14, data=d5)
nb6 = glm.nb(y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14, data=d6)
nb7 = glm.nb(y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14, data=d7)

stargazer(nb1, nb2, nb3, nb4, nb5, nb6, nb7,
          type="html", column.labels=nazwy_litter, model.numbers=FALSE,
          covariate.labels=etykiety)
Dependent variable:
y
Cigarette buds Plastic bags Bottle caps Beverage cont. SUP Plastic ropes Styrofoam
BathingZoneManag 0.565* -0.050 0.264 0.651 0.759** -0.016 -0.415
(0.298) (0.331) (0.418) (0.510) (0.376) (0.364) (0.371)
BlueFlag -1.131*** -1.278** -1.694** -1.317 -0.434 -0.288 -0.008
(0.429) (0.521) (0.665) (0.887) (0.545) (0.537) (0.538)
GarbageBin 1.379*** 1.063*** 1.563*** -0.923* 0.691* 0.122 -0.427
(0.282) (0.313) (0.401) (0.542) (0.368) (0.337) (0.356)
BeachCleanup -0.071 -0.355 -0.100 -0.030 -0.420 -0.451* 0.405
(0.230) (0.270) (0.350) (0.380) (0.310) (0.269) (0.273)
BeachCafe 0.779* 1.293*** 0.978 1.488 0.636 0.960* 1.025*
(0.438) (0.482) (0.617) (0.907) (0.514) (0.546) (0.549)
CoastCafe -0.041 -0.430 -0.561 -0.614 0.241 0.138 0.469
(0.263) (0.285) (0.375) (0.461) (0.339) (0.317) (0.321)
Accommod -0.150 -0.647** -0.469 1.356** 0.088 0.525 -1.158***
(0.267) (0.288) (0.355) (0.558) (0.342) (0.323) (0.320)
Parking 0.492 0.581 -0.055 -0.450 0.812 0.891** -0.323
(0.372) (0.405) (0.489) (0.687) (0.501) (0.450) (0.461)
Shops -0.376 -0.012 0.308 -0.795* 0.374 -0.712** 0.212
(0.262) (0.277) (0.363) (0.472) (0.329) (0.316) (0.323)
PublTransport 0.394 0.480 0.275 0.688 -0.512 -0.018 -0.144
(0.298) (0.327) (0.427) (0.530) (0.395) (0.355) (0.358)
BeachUseCateg -0.364 0.324 -0.240 0.741 -1.038** -0.048 0.855*
(0.356) (0.385) (0.484) (0.674) (0.458) (0.428) (0.443)
ln(LocalResidents) 0.221* -0.002 0.096 -0.112 0.294* 0.229 -0.424***
(0.129) (0.139) (0.177) (0.232) (0.171) (0.155) (0.155)
ln(TouristNights2024) 0.014 -0.053 -0.147 -0.756* 0.190 -0.506** 0.024
(0.202) (0.220) (0.279) (0.423) (0.257) (0.246) (0.243)
LVSeaBasin 0.422* -0.208 -1.119*** -0.065 -0.001 0.093 -0.242
(0.217) (0.246) (0.320) (0.370) (0.278) (0.262) (0.267)
Constant 1.384 2.576 4.760* 7.838** -1.671 6.015*** 4.243*
(1.851) (2.024) (2.600) (3.747) (2.330) (2.256) (2.270)
Observations 44 44 44 44 44 44 44
Log Likelihood -223.086 -131.796 -120.435 -65.871 -97.892 -149.346 -137.706
theta 2.901*** (0.648) 3.426*** (1.116) 2.078*** (0.624) 3.890 (3.269) 3.594** (1.635) 2.299*** (0.567) 2.421*** (0.609)
Akaike Inf. Crit. 476.173 293.592 270.870 161.743 225.783 328.692 305.412
Note: p<0.1; p<0.05; p<0.01

Testowanie: Poisson vs NB

H0: brak naddyspersji. Odrzucenie H0 -> wygrywa NB.

modele_nb   = list(nb1, nb2, nb3, nb4, nb5, nb6, nb7)
modele_pois = list(pois1, pois2, pois3, pois4, pois5, pois6, pois7)

lr_tab = data.frame()
for (i in 1:7) {
  test = lrtest(modele_nb[[i]], modele_pois[[i]])
  p = test[["Pr(>Chisq)"]][2]
  lr_tab = rbind(lr_tab, data.frame(
    litter      = nazwy_litter[i],
    logLik_NB   = round(test[["LogLik"]][1], 3),
    logLik_Pois = round(test[["LogLik"]][2], 3),
    Chisq       = round(test[["Chisq"]][2], 3),
    df          = abs(test[["Df"]][2]),
    p_value     = format.pval(p, digits = 3, eps = 1e-16),
    result       = ifelse(p < 0.05, "NB", "Poisson")
  ))
}

knitr::kable(
  lr_tab,
  col.names = c("Typ śmieci", "logLik NB", "logLik Poisson",
                "Chi-sq.", "df", "p-val", ""),
  caption = "Test ilorazu wiarygodności: Poisson vs NB"
)
Test ilorazu wiarygodności: Poisson vs NB
Typ śmieci logLik NB logLik Poisson Chi-sq. df p-val
Cigarette buds -222.086 -639.646 835.120 1 <1e-16 NB
Plastic bags -130.796 -152.947 44.302 1 2.81e-11 NB
Bottle caps -119.435 -160.862 82.855 1 <1e-16 NB
Beverage cont. -64.871 -66.099 2.454 1 0.117 Poisson
SUP -96.892 -102.788 11.793 1 0.000595 NB
Plastic ropes -148.346 -241.218 185.745 1 <1e-16 NB
Styrofoam -136.706 -220.566 167.721 1 <1e-16 NB

Dla wszystkich zmiennych zależnych z wyjątkiem Beverage containers wygrywa model ujemny dwumianowy. Beverage containers jest dość specyficzny — aż 39% obserwacji zerowych.

Czy modele zero-inflated mają sens?

  1. Zero test (H0: Poisson; H1: ZIP) (https://search.r-project.org/CRAN/refmans/vcdExtra/html/zero.test.html)
library(vcdExtra) 

zt_tab = data.frame()
for (i in 1:7) {
  invisible(capture.output(zt <- zero.test(litter_data[[i]]$y)))
  zt_tab = rbind(zt_tab, data.frame(
    litter      = nazwy_litter[i],
    chi2        = round(zt$statistic, 2),
    df          = zt$df,
    p_value     = zt$prob
  ))
}

knitr::kable(
  zt_tab,
  col.names = c("Typ śmieci", "Chi-sq.", "df", "p-value"),
  caption = "Test van den Broeka: zero inflation względem rozkładu Poissona"
)
Test van den Broeka: zero inflation względem rozkładu Poissona
Typ śmieci Chi-sq. df p-value
Cigarette buds 0.00 1 1.0000000
Plastic bags 5651.37 1 0.0000000
Bottle caps 2379.51 1 0.0000000
Beverage cont. 11.88 1 0.0005665
SUP 20.04 1 0.0000076
Plastic ropes 0.00 1 0.9933658
Styrofoam 2241.82 1 0.0000000

Dla wszystkich typów śmieci z wyjątkiem Cigarette buds i plastic ropes zero-inflated jest wskazany ale - ogromne wartości chi-sq dla plastic bags i bottle caps, a mała dla beverage cont (która ma największy udział zer) czemu? wywindowane chi-sq dotyczy typów śmieci charakteryzowanych przez wysoką średnią, to jest raczej efekt naddyspersji wykazanej przez lrtest

  1. Porównanie zer obserwowanych i oczekiwanych wg Poissona (https://r-statistics.co/Zero-Inflated-Distributions-in-R.html#:~:text=The%20data%20has%20308%20zeros%2C%20but%20a%20plain,hurdle%20model%20rather%20than%20a%20vanilla%20Poisson%20fit.)
zi_test = data.frame()
for (i in 1:7) {
  dane_test = litter_data[[i]]
  pois = glm(y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14,
             family=poisson, data=dane_test)
  zera_obs = sum(dane_test$y == 0)            # ile zer jest
  zera_exp = sum(dpois(0, fitted(pois)))      # zera oczekiwane

  zi_test = rbind(zi_test, data.frame(
    litter   = nazwy_litter[i],
    zera_obs = round(zera_obs,3),
    proc_zer = round(100 * zera_obs / nrow(dane_test),3),
    zera_exp = round(zera_exp, 3),
    diff = round(zera_obs - zera_exp,3)
  ))
}

knitr::kable(
  zi_test,
  col.names = c("Typ śmieci", "Zera obserwowalne", "Udział zer", "Zera oczekiwane", "Różnica"),
  caption = ""
)
Typ śmieci Zera obserwowalne Udział zer Zera oczekiwane Różnica
Cigarette buds 0 0.000 0.000 0.000
Plastic bags 3 6.818 0.939 2.061
Bottle caps 6 13.636 3.164 2.836
Beverage cont. 17 38.636 14.413 2.587
SUP 5 11.364 3.939 1.061
Plastic ropes 0 0.000 0.056 -0.056
Styrofoam 1 2.273 0.420 0.580

Generalnie żaden z typów śmieci nie wskazuje na bezwzględną konieczność uwzględnienia modeli zero-inflated, ale dla Plastic bags, Beverage containers, Bottle caps i Beverage containers difference przyjmuje największe wartości - dla tych trzech wykonuję modele ZI jako robustness.

zero-inflated Poisson (ZIP)

nazwy_litter_2 = c("Plastic bags", "Bottle caps","Beverage cont.")

zip2 = zeroinfl(y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14|
                  X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14, data=d2, dist="poisson")
zip3 = zeroinfl(y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14|
                  X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14, data=d3, dist="poisson")
zip4 = zeroinfl(y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14|
                  X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14, data=d4, dist="poisson")

stargazer(zip2, zip3, zip4,
          type="html", column.labels=nazwy_litter_2, model.numbers=FALSE,
          covariate.labels=etykiety)
Dependent variable:
y
Plastic bags Bottle caps Beverage cont.
BathingZoneManag -0.163 -0.449 0.594
BlueFlag -1.353 -1.476 -0.623
GarbageBin 0.930 1.535 0.141
BeachCleanup -0.372 0.447 -0.588
BeachCafe 1.289 0.970 1.613
CoastCafe -0.444 -0.699 0.028
Accommod -0.417 -0.542 -0.084
Parking 0.539 -0.128 0.916
Shops -0.190 0.373 -0.612
PublTransport 0.326 0.552 -0.390
BeachUseCateg 0.552 -0.162 -0.121
ln(LocalResidents) 0.033 -0.222 -0.176
ln(TouristNights2024) -0.111 0.085 -0.615
LVSeaBasin -0.315 -0.967 0.290
Constant 2.847 3.647 9.305
Observations 44 44 44
Log Likelihood -143.598 -145.721 -48.227
Note: p<0.1; p<0.05; p<0.01

zero-inflated NB (ZINB)

zinb2 = zeroinfl(y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14|
                   X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14, data=d2, dist="negbin")
zinb3 = zeroinfl(y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14|
                   X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14, data=d3, dist="negbin")
zinb4 = zeroinfl(y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14|
                   X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14, data=d4, dist="negbin")

stargazer(zinb2, zinb3, zinb4,
          type="html", column.labels=nazwy_litter_2, model.numbers=FALSE,
          covariate.labels=etykiety)
Dependent variable:
y
Plastic bags Bottle caps Beverage cont.
BathingZoneManag 0.046 -0.011 0.707
BlueFlag -1.115 -1.511 -0.898
GarbageBin 0.973 1.530 -0.008
BeachCleanup -0.515 -0.226 -0.326
BeachCafe 1.129 0.683 1.841
CoastCafe -0.266 -0.280 -0.347
Accommod -0.453 -0.347 0.003
Parking 0.593 -0.050 1.092
Shops -0.049 0.310 -0.704
PublTransport 0.287 0.288 -0.039
BeachUseCateg 0.278 0.071 -0.481
ln(LocalResidents) 0.088 -0.016 -0.078
ln(TouristNights2024) -0.126 -0.135 -0.726
LVSeaBasin -0.197 -1.042 0.136
Constant 3.017 5.077 10.056
Observations 44 44 44
Log Likelihood -124.203 -109.654 -47.089
Note: p<0.1; p<0.05; p<0.01

Wyniki modeli ZI nie są nigdzie istotne.