Dane wejściowe mają format pliku xlsx i zawierają 8 arkuszy: arkusz z monitoring sites coordinates oraz 7 arkuszy nazwanych od nazwy zmiennej zależnej, zawierających wartości kolejnych zmiennych objaśniających w kolumnach.
Sekcja skopiowana bezpośrednio z markdown, którym realizowałam zajęcia z ekonometrii przestrzennej; służy wizualnemu opisowi danych.
pkgs = c(
"readxl",
"dplyr",
"stringr",
"ggplot2",
"knitr",
"sf",
"rnaturalearth"
)
invisible(lapply(pkgs, library, character.only = TRUE))
# kolorki
COL_BG = "#F7F7FB"
COL_DARK = "#2E3440"
COL_GREY = "#D8DEE9"
COL_PURPLE_DARK = "#3B0F70"
COL_PURPLE = "#8C2981"
COL_PINK = "#DE4968"
COL_PEACH = "#FE9F6D"
COL_YELLOW = "#FCFDBF"
PAL_CONT = c(
COL_PURPLE_DARK,
COL_PURPLE,
COL_PINK,
COL_PEACH,
COL_YELLOW
)
SCALE_CONT_FILL = scale_fill_gradientn(colours = PAL_CONT, name = NULL)
SCALE_CONT_COL = scale_color_gradientn(colours = PAL_CONT, name = NULL)
pal_discrete = function(n) {
base = c(COL_PURPLE_DARK, COL_PURPLE, COL_PINK, COL_PEACH, COL_YELLOW)
if (n <= length(base)) return(base[1:n])
grDevices::colorRampPalette(base)(n)
}
theme_spojny = theme_minimal(base_size = 12) +
theme(
plot.title = element_text(face = "bold", color = COL_DARK),
plot.subtitle = element_text(color = COL_DARK),
legend.title = element_text(face = "bold", color = COL_DARK),
legend.text = element_text(color = COL_DARK),
panel.grid.minor = element_blank(),
panel.grid.major = element_line(color = "grey92"),
strip.text = element_text(face = "bold", color = COL_DARK)
)
PATH = "C:/Users/majed/OneDrive/Pulpit/smieci.xlsx"
sheet_coords = "Site coordinates"
sheets_all = readxl::excel_sheets(PATH)
sheets_cat = sheets_all[sheets_all != sheet_coords]
coords = readxl::read_excel(PATH, sheet = sheet_coords) %>%
dplyr::rename(site_id = `Monitoring site ID`) %>%
dplyr::mutate(
site_id = as.integer(site_id),
lat = as.numeric(lat),
lon = as.numeric(lon)
)
litter_list = list()
for (sh in sheets_cat) {
df = readxl::read_excel(PATH, sheet = sh)
y_col = names(df)[stringr::str_detect(names(df), "^Y_")]
df2 = df %>%
dplyr::rename(site_id = `Monitoring site ID`, y = dplyr::all_of(y_col)) %>%
dplyr::mutate(
site_id = as.integer(site_id),
y = as.numeric(y),
category_sheet = sh,
category = sh %>%
stringr::str_remove("^\\d+_") %>%
stringr::str_replace_all("_", " ") %>%
stringr::str_trim()
) %>%
dplyr::left_join(coords, by = "site_id")
litter_list[[sh]] = df2
}
litter_long = dplyr::bind_rows(litter_list)
tab_summary = litter_long %>%
dplyr::group_by(category) %>%
dplyr::summarise(
n_sites = sum(!is.na(y)),
total = sum(y, na.rm = TRUE),
mean = mean(y, na.rm = TRUE),
median = median(y, na.rm = TRUE),
sd = sd(y, na.rm = TRUE),
min = min(y, na.rm = TRUE),
max = max(y, na.rm = TRUE),
.groups = "drop"
) %>%
dplyr::arrange(desc(mean))
knitr::kable(
tab_summary,
digits = 2,
col.names = c("Typ śmieci", "Liczba obszarów monitoringu", "Suma",
"Średnia", "Mediana", "SD", "Min", "Max"),
caption = "Statystyki opisowe"
)
| Typ śmieci | Liczba obszarów monitoringu | Suma | Średnia | Mediana | SD | Min | Max |
|---|---|---|---|---|---|---|---|
| Cigarette butts | 44 | 4783 | 108.70 | 67.5 | 114.62 | 2 | 514 |
| Plastic ropes | 44 | 588 | 13.36 | 8.5 | 14.53 | 1 | 70 |
| Styrofoam and polystyrene | 44 | 506 | 11.50 | 6.0 | 16.81 | 0 | 103 |
| Plastic bags | 44 | 450 | 10.23 | 7.0 | 9.35 | 0 | 36 |
| Plastic bottle caps | 44 | 351 | 7.98 | 4.0 | 11.85 | 0 | 66 |
| SUP dishes and cutlery | 44 | 170 | 3.86 | 3.0 | 4.04 | 0 | 19 |
| Beverage containers | 44 | 66 | 1.50 | 1.0 | 1.77 | 0 | 7 |
ggplot(litter_long, aes(x = log1p(y))) +
geom_histogram(
bins = 25,
fill = "grey",
color = "white",
linewidth = 0.2,
alpha = 0.9
) +
facet_wrap(~ category, scales = "free_y", ncol = 3) +
labs(
title = "Dystrybucja przestrzenna rozkładu śmieci",
x = "log(1 + y)",
y = "Liczba obszarów monitoringu"
) +
theme_spojny +
theme(
legend.position = "none",
strip.background = element_rect(fill = "white", color = NA)
)
latvia_sf_4326 = rnaturalearth::ne_countries(
country = "Latvia", scale = "medium", returnclass = "sf"
) %>% sf::st_make_valid() %>% sf::st_transform(4326)
litter_sf_4326 = litter_long %>%
dplyr::filter(!is.na(lat), !is.na(lon)) %>%
sf::st_as_sf(coords = c("lon", "lat"), crs = 4326)
ggplot() +
geom_sf(
data = latvia_sf_4326,
fill = COL_BG,
color = COL_DARK,
linewidth = 0.35
) +
geom_sf(
data = litter_sf_4326,
aes(size = y, color = y),
alpha = 0.8
) +
facet_wrap(~ category) +
scale_size_continuous(
name = "Liczba jednostek",
range = c(1.2, 6)
) +
scale_color_gradientn(
colours = PAL_CONT,
name = "Liczba jednostek",
trans = "sqrt"
) +
labs(
title = "Rozmieszczenie przestrzenne obszarów monitoringu i jednostek śmieci",
subtitle = "Rozmiar i kolor punktu odzwierciedlają liczbę obserwacji"
) +
theme_spojny +
theme(
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
panel.grid = element_blank(),
legend.position = "bottom"
)
Estymowane modele to kandydaci: Poisson, ujemny dwumianowy (NB) oraz ich zero-inflated odpowiedniki (dla niektórych typów śmieci udział obserwacji zerowych wynosi około 30%).
library("readxl")
library("MASS")
library("pscl")
library("stargazer")
library("lmtest")
library("systemfit")
Każda kolumna ze zmienną objaśniającą ma nazewnictwo na wzór
X1_"nazwa zmiennej". Dla wygody kodowania modeli skracam
nazwy do X1–X14, a pełne etykiety wrzucam na
wydrukach
# etykietki dla kolumn - lekko skracam nazwy zeby miescily sie na wydrukach
nazwy_litter = c("Cigarette buds","Plastic bags","Bottle caps",
"Beverage cont.","SUP","Plastic ropes","Styrofoam")
litter_data = list()
for (i in 1:7) {
dane = as.data.frame(litter_list[[i]])[, 2:16]
names(dane) = c("y","X1","X2","X3","X4","X5","X6","X7","X8","X9",
"X10","X11","X12","X13","X14")
dane = na.omit(dane)
dane$lnX12 = log(dane$X12) # liczba mieszkancow - logarytm
dane$lnX13 = log(dane$X13) # noclegi turystyczne - logarytm
litter_data[[i]] = dane
}
# etykietki pod wydruki
etykiety = c("BathingZoneManag","BlueFlag","GarbageBin","BeachCleanup",
"BeachCafe","CoastCafe","Accommod","Parking","Shops",
"PublTransport","BeachUseCateg","ln(LocalResidents)",
"ln(TouristNights2024)","LVSeaBasin")
# dane po arkuszach i typach smieci
d1 = litter_data[[1]] # Cigarette butts
d2 = litter_data[[2]] # Plastic bags
d3 = litter_data[[3]] # Plastic bottle caps
d4 = litter_data[[4]] # Beverage containers
d5 = litter_data[[5]] # SUP dishes and cutlery
d6 = litter_data[[6]] # Plastic ropes
d7 = litter_data[[7]] # Styrofoam and polystyrene
pois1 = glm(y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14, family=poisson, data=d1)
pois2 = glm(y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14, family=poisson, data=d2)
pois3 = glm(y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14, family=poisson, data=d3)
pois4 = glm(y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14, family=poisson, data=d4)
pois5 = glm(y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14, family=poisson, data=d5)
pois6 = glm(y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14, family=poisson, data=d6)
pois7 = glm(y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14, family=poisson, data=d7)
stargazer(pois1, pois2, pois3, pois4, pois5, pois6, pois7,
type="html", column.labels=nazwy_litter, model.numbers=FALSE,
covariate.labels=etykiety)
| Dependent variable: | |||||||
| y | |||||||
| Cigarette buds | Plastic bags | Bottle caps | Beverage cont. | SUP | Plastic ropes | Styrofoam | |
| BathingZoneManag | 0.384*** | -0.307* | -0.349* | 0.646 | 0.789*** | -0.300* | -0.516*** |
| (0.060) | (0.186) | (0.204) | (0.430) | (0.264) | (0.160) | (0.186) | |
| BlueFlag | -1.147*** | -1.290*** | -1.514*** | -1.302* | -0.095 | -0.240 | 0.215 |
| (0.075) | (0.257) | (0.293) | (0.787) | (0.350) | (0.246) | (0.258) | |
| GarbageBin | 1.477*** | 1.003*** | 1.696*** | -0.904* | 0.745*** | 0.198 | -0.272 |
| (0.055) | (0.167) | (0.195) | (0.467) | (0.270) | (0.127) | (0.175) | |
| BeachCleanup | -0.110* | -0.239 | 0.467** | -0.012 | -0.412* | -0.384*** | 0.576*** |
| (0.056) | (0.157) | (0.186) | (0.319) | (0.233) | (0.104) | (0.125) | |
| BeachCafe | 0.744*** | 1.345*** | 1.013*** | 1.429* | 0.502* | 1.124*** | 1.058*** |
| (0.055) | (0.207) | (0.265) | (0.793) | (0.304) | (0.233) | (0.252) | |
| CoastCafe | 0.033 | -0.590*** | -0.773*** | -0.664* | 0.399 | -0.106 | 0.575*** |
| (0.053) | (0.158) | (0.204) | (0.391) | (0.246) | (0.129) | (0.162) | |
| Accommod | -0.204*** | -0.537*** | -0.600*** | 1.410*** | -0.001 | 0.411*** | -0.874*** |
| (0.051) | (0.147) | (0.172) | (0.497) | (0.247) | (0.126) | (0.137) | |
| Parking | 0.357*** | 0.544** | -0.226 | -0.302 | 0.896** | 0.822*** | -0.671*** |
| (0.076) | (0.220) | (0.216) | (0.589) | (0.385) | (0.187) | (0.208) | |
| Shops | -0.177*** | -0.184 | 0.441** | -0.769** | 0.447** | -0.857*** | 0.644*** |
| (0.040) | (0.136) | (0.174) | (0.380) | (0.227) | (0.119) | (0.137) | |
| PublTransport | 0.486*** | 0.435*** | 0.483** | 0.642 | -0.530* | 0.164 | -0.413*** |
| (0.052) | (0.166) | (0.211) | (0.435) | (0.286) | (0.137) | (0.159) | |
| BeachUseCateg | -0.308*** | 0.541*** | -0.271 | 0.693 | -1.175*** | 0.218 | 0.541** |
| (0.069) | (0.195) | (0.211) | (0.578) | (0.323) | (0.173) | (0.211) | |
| ln(LocalResidents) | 0.185*** | -0.034 | -0.182** | -0.079 | 0.256** | 0.073 | -0.482*** |
| (0.025) | (0.073) | (0.089) | (0.199) | (0.125) | (0.061) | (0.071) | |
| ln(TouristNights2024) | -0.087** | -0.006 | 0.109 | -0.783** | 0.258 | -0.294*** | 0.019 |
| (0.039) | (0.117) | (0.143) | (0.378) | (0.182) | (0.103) | (0.116) | |
| LVSeaBasin | 0.061 | -0.277** | -1.022*** | -0.049 | -0.138 | 0.209* | 0.092 |
| (0.044) | (0.130) | (0.159) | (0.308) | (0.192) | (0.108) | (0.126) | |
| Constant | 3.403*** | 1.972* | 3.255** | 7.784** | -1.815 | 4.316*** | 4.511*** |
| (0.332) | (1.073) | (1.360) | (3.275) | (1.618) | (0.933) | (1.076) | |
| Observations | 44 | 44 | 44 | 44 | 44 | 44 | 44 |
| Log Likelihood | -639.646 | -152.947 | -160.862 | -66.099 | -102.788 | -241.218 | -220.566 |
| Akaike Inf. Crit. | 1,309.292 | 335.894 | 351.725 | 162.197 | 235.576 | 512.437 | 471.133 |
| Note: | p<0.1; p<0.05; p<0.01 | ||||||
nb1 = glm.nb(y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14, data=d1)
nb2 = glm.nb(y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14, data=d2)
nb3 = glm.nb(y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14, data=d3)
nb4 = glm.nb(y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14, data=d4)
nb5 = glm.nb(y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14, data=d5)
nb6 = glm.nb(y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14, data=d6)
nb7 = glm.nb(y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14, data=d7)
stargazer(nb1, nb2, nb3, nb4, nb5, nb6, nb7,
type="html", column.labels=nazwy_litter, model.numbers=FALSE,
covariate.labels=etykiety)
| Dependent variable: | |||||||
| y | |||||||
| Cigarette buds | Plastic bags | Bottle caps | Beverage cont. | SUP | Plastic ropes | Styrofoam | |
| BathingZoneManag | 0.565* | -0.050 | 0.264 | 0.651 | 0.759** | -0.016 | -0.415 |
| (0.298) | (0.331) | (0.418) | (0.510) | (0.376) | (0.364) | (0.371) | |
| BlueFlag | -1.131*** | -1.278** | -1.694** | -1.317 | -0.434 | -0.288 | -0.008 |
| (0.429) | (0.521) | (0.665) | (0.887) | (0.545) | (0.537) | (0.538) | |
| GarbageBin | 1.379*** | 1.063*** | 1.563*** | -0.923* | 0.691* | 0.122 | -0.427 |
| (0.282) | (0.313) | (0.401) | (0.542) | (0.368) | (0.337) | (0.356) | |
| BeachCleanup | -0.071 | -0.355 | -0.100 | -0.030 | -0.420 | -0.451* | 0.405 |
| (0.230) | (0.270) | (0.350) | (0.380) | (0.310) | (0.269) | (0.273) | |
| BeachCafe | 0.779* | 1.293*** | 0.978 | 1.488 | 0.636 | 0.960* | 1.025* |
| (0.438) | (0.482) | (0.617) | (0.907) | (0.514) | (0.546) | (0.549) | |
| CoastCafe | -0.041 | -0.430 | -0.561 | -0.614 | 0.241 | 0.138 | 0.469 |
| (0.263) | (0.285) | (0.375) | (0.461) | (0.339) | (0.317) | (0.321) | |
| Accommod | -0.150 | -0.647** | -0.469 | 1.356** | 0.088 | 0.525 | -1.158*** |
| (0.267) | (0.288) | (0.355) | (0.558) | (0.342) | (0.323) | (0.320) | |
| Parking | 0.492 | 0.581 | -0.055 | -0.450 | 0.812 | 0.891** | -0.323 |
| (0.372) | (0.405) | (0.489) | (0.687) | (0.501) | (0.450) | (0.461) | |
| Shops | -0.376 | -0.012 | 0.308 | -0.795* | 0.374 | -0.712** | 0.212 |
| (0.262) | (0.277) | (0.363) | (0.472) | (0.329) | (0.316) | (0.323) | |
| PublTransport | 0.394 | 0.480 | 0.275 | 0.688 | -0.512 | -0.018 | -0.144 |
| (0.298) | (0.327) | (0.427) | (0.530) | (0.395) | (0.355) | (0.358) | |
| BeachUseCateg | -0.364 | 0.324 | -0.240 | 0.741 | -1.038** | -0.048 | 0.855* |
| (0.356) | (0.385) | (0.484) | (0.674) | (0.458) | (0.428) | (0.443) | |
| ln(LocalResidents) | 0.221* | -0.002 | 0.096 | -0.112 | 0.294* | 0.229 | -0.424*** |
| (0.129) | (0.139) | (0.177) | (0.232) | (0.171) | (0.155) | (0.155) | |
| ln(TouristNights2024) | 0.014 | -0.053 | -0.147 | -0.756* | 0.190 | -0.506** | 0.024 |
| (0.202) | (0.220) | (0.279) | (0.423) | (0.257) | (0.246) | (0.243) | |
| LVSeaBasin | 0.422* | -0.208 | -1.119*** | -0.065 | -0.001 | 0.093 | -0.242 |
| (0.217) | (0.246) | (0.320) | (0.370) | (0.278) | (0.262) | (0.267) | |
| Constant | 1.384 | 2.576 | 4.760* | 7.838** | -1.671 | 6.015*** | 4.243* |
| (1.851) | (2.024) | (2.600) | (3.747) | (2.330) | (2.256) | (2.270) | |
| Observations | 44 | 44 | 44 | 44 | 44 | 44 | 44 |
| Log Likelihood | -223.086 | -131.796 | -120.435 | -65.871 | -97.892 | -149.346 | -137.706 |
| theta | 2.901*** (0.648) | 3.426*** (1.116) | 2.078*** (0.624) | 3.890 (3.269) | 3.594** (1.635) | 2.299*** (0.567) | 2.421*** (0.609) |
| Akaike Inf. Crit. | 476.173 | 293.592 | 270.870 | 161.743 | 225.783 | 328.692 | 305.412 |
| Note: | p<0.1; p<0.05; p<0.01 | ||||||
H0: brak naddyspersji. Odrzucenie H0 -> wygrywa NB.
modele_nb = list(nb1, nb2, nb3, nb4, nb5, nb6, nb7)
modele_pois = list(pois1, pois2, pois3, pois4, pois5, pois6, pois7)
lr_tab = data.frame()
for (i in 1:7) {
test = lrtest(modele_nb[[i]], modele_pois[[i]])
p = test[["Pr(>Chisq)"]][2]
lr_tab = rbind(lr_tab, data.frame(
litter = nazwy_litter[i],
logLik_NB = round(test[["LogLik"]][1], 3),
logLik_Pois = round(test[["LogLik"]][2], 3),
Chisq = round(test[["Chisq"]][2], 3),
df = abs(test[["Df"]][2]),
p_value = format.pval(p, digits = 3, eps = 1e-16),
result = ifelse(p < 0.05, "NB", "Poisson")
))
}
knitr::kable(
lr_tab,
col.names = c("Typ śmieci", "logLik NB", "logLik Poisson",
"Chi-sq.", "df", "p-val", ""),
caption = "Test ilorazu wiarygodności: Poisson vs NB"
)
| Typ śmieci | logLik NB | logLik Poisson | Chi-sq. | df | p-val | |
|---|---|---|---|---|---|---|
| Cigarette buds | -222.086 | -639.646 | 835.120 | 1 | <1e-16 | NB |
| Plastic bags | -130.796 | -152.947 | 44.302 | 1 | 2.81e-11 | NB |
| Bottle caps | -119.435 | -160.862 | 82.855 | 1 | <1e-16 | NB |
| Beverage cont. | -64.871 | -66.099 | 2.454 | 1 | 0.117 | Poisson |
| SUP | -96.892 | -102.788 | 11.793 | 1 | 0.000595 | NB |
| Plastic ropes | -148.346 | -241.218 | 185.745 | 1 | <1e-16 | NB |
| Styrofoam | -136.706 | -220.566 | 167.721 | 1 | <1e-16 | NB |
Dla wszystkich zmiennych zależnych z wyjątkiem Beverage containers wygrywa model ujemny dwumianowy. Beverage containers jest dość specyficzny — aż 39% obserwacji zerowych.
library(vcdExtra)
zt_tab = data.frame()
for (i in 1:7) {
invisible(capture.output(zt <- zero.test(litter_data[[i]]$y)))
zt_tab = rbind(zt_tab, data.frame(
litter = nazwy_litter[i],
chi2 = round(zt$statistic, 2),
df = zt$df,
p_value = zt$prob
))
}
knitr::kable(
zt_tab,
col.names = c("Typ śmieci", "Chi-sq.", "df", "p-value"),
caption = "Test van den Broeka: zero inflation względem rozkładu Poissona"
)
| Typ śmieci | Chi-sq. | df | p-value |
|---|---|---|---|
| Cigarette buds | 0.00 | 1 | 1.0000000 |
| Plastic bags | 5651.37 | 1 | 0.0000000 |
| Bottle caps | 2379.51 | 1 | 0.0000000 |
| Beverage cont. | 11.88 | 1 | 0.0005665 |
| SUP | 20.04 | 1 | 0.0000076 |
| Plastic ropes | 0.00 | 1 | 0.9933658 |
| Styrofoam | 2241.82 | 1 | 0.0000000 |
Dla wszystkich typów śmieci z wyjątkiem Cigarette buds i plastic ropes zero-inflated jest wskazany ale - ogromne wartości chi-sq dla plastic bags i bottle caps, a mała dla beverage cont (która ma największy udział zer) czemu? wywindowane chi-sq dotyczy typów śmieci charakteryzowanych przez wysoką średnią, to jest raczej efekt naddyspersji wykazanej przez lrtest
zi_test = data.frame()
for (i in 1:7) {
dane_test = litter_data[[i]]
pois = glm(y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14,
family=poisson, data=dane_test)
zera_obs = sum(dane_test$y == 0) # ile zer jest
zera_exp = sum(dpois(0, fitted(pois))) # zera oczekiwane
zi_test = rbind(zi_test, data.frame(
litter = nazwy_litter[i],
zera_obs = round(zera_obs,3),
proc_zer = round(100 * zera_obs / nrow(dane_test),3),
zera_exp = round(zera_exp, 3),
diff = round(zera_obs - zera_exp,3)
))
}
knitr::kable(
zi_test,
col.names = c("Typ śmieci", "Zera obserwowalne", "Udział zer", "Zera oczekiwane", "Różnica"),
caption = ""
)
| Typ śmieci | Zera obserwowalne | Udział zer | Zera oczekiwane | Różnica |
|---|---|---|---|---|
| Cigarette buds | 0 | 0.000 | 0.000 | 0.000 |
| Plastic bags | 3 | 6.818 | 0.939 | 2.061 |
| Bottle caps | 6 | 13.636 | 3.164 | 2.836 |
| Beverage cont. | 17 | 38.636 | 14.413 | 2.587 |
| SUP | 5 | 11.364 | 3.939 | 1.061 |
| Plastic ropes | 0 | 0.000 | 0.056 | -0.056 |
| Styrofoam | 1 | 2.273 | 0.420 | 0.580 |
Generalnie żaden z typów śmieci nie wskazuje na bezwzględną konieczność uwzględnienia modeli zero-inflated, ale dla Plastic bags, Beverage containers, Bottle caps i Beverage containers difference przyjmuje największe wartości - dla tych trzech wykonuję modele ZI jako robustness.
nazwy_litter_2 = c("Plastic bags", "Bottle caps","Beverage cont.")
zip2 = zeroinfl(y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14|
X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14, data=d2, dist="poisson")
zip3 = zeroinfl(y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14|
X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14, data=d3, dist="poisson")
zip4 = zeroinfl(y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14|
X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14, data=d4, dist="poisson")
stargazer(zip2, zip3, zip4,
type="html", column.labels=nazwy_litter_2, model.numbers=FALSE,
covariate.labels=etykiety)
| Dependent variable: | |||
| y | |||
| Plastic bags | Bottle caps | Beverage cont. | |
| BathingZoneManag | -0.163 | -0.449 | 0.594 |
| BlueFlag | -1.353 | -1.476 | -0.623 |
| GarbageBin | 0.930 | 1.535 | 0.141 |
| BeachCleanup | -0.372 | 0.447 | -0.588 |
| BeachCafe | 1.289 | 0.970 | 1.613 |
| CoastCafe | -0.444 | -0.699 | 0.028 |
| Accommod | -0.417 | -0.542 | -0.084 |
| Parking | 0.539 | -0.128 | 0.916 |
| Shops | -0.190 | 0.373 | -0.612 |
| PublTransport | 0.326 | 0.552 | -0.390 |
| BeachUseCateg | 0.552 | -0.162 | -0.121 |
| ln(LocalResidents) | 0.033 | -0.222 | -0.176 |
| ln(TouristNights2024) | -0.111 | 0.085 | -0.615 |
| LVSeaBasin | -0.315 | -0.967 | 0.290 |
| Constant | 2.847 | 3.647 | 9.305 |
| Observations | 44 | 44 | 44 |
| Log Likelihood | -143.598 | -145.721 | -48.227 |
| Note: | p<0.1; p<0.05; p<0.01 | ||
zinb2 = zeroinfl(y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14|
X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14, data=d2, dist="negbin")
zinb3 = zeroinfl(y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14|
X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14, data=d3, dist="negbin")
zinb4 = zeroinfl(y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14|
X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+lnX12+lnX13+X14, data=d4, dist="negbin")
stargazer(zinb2, zinb3, zinb4,
type="html", column.labels=nazwy_litter_2, model.numbers=FALSE,
covariate.labels=etykiety)
| Dependent variable: | |||
| y | |||
| Plastic bags | Bottle caps | Beverage cont. | |
| BathingZoneManag | 0.046 | -0.011 | 0.707 |
| BlueFlag | -1.115 | -1.511 | -0.898 |
| GarbageBin | 0.973 | 1.530 | -0.008 |
| BeachCleanup | -0.515 | -0.226 | -0.326 |
| BeachCafe | 1.129 | 0.683 | 1.841 |
| CoastCafe | -0.266 | -0.280 | -0.347 |
| Accommod | -0.453 | -0.347 | 0.003 |
| Parking | 0.593 | -0.050 | 1.092 |
| Shops | -0.049 | 0.310 | -0.704 |
| PublTransport | 0.287 | 0.288 | -0.039 |
| BeachUseCateg | 0.278 | 0.071 | -0.481 |
| ln(LocalResidents) | 0.088 | -0.016 | -0.078 |
| ln(TouristNights2024) | -0.126 | -0.135 | -0.726 |
| LVSeaBasin | -0.197 | -1.042 | 0.136 |
| Constant | 3.017 | 5.077 | 10.056 |
| Observations | 44 | 44 | 44 |
| Log Likelihood | -124.203 | -109.654 | -47.089 |
| Note: | p<0.1; p<0.05; p<0.01 | ||
Wyniki modeli ZI nie są nigdzie istotne.