install.packages("tidyverse")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
install.packages("summarytools")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
install.packages("dplyr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
install.packages("skimr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
install.packages("janitor")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
install.packages("naniar")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
install.packages("rstatix")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
install.packages("dlookr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
library(readxl)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(summarytools)
## Warning in fun(libname, pkgname): couldn't connect to display ":0"
## system might not have X11 capabilities; in case of errors when using dfSummary(), set st_options(use.x11 = FALSE)
##
## Attaching package: 'summarytools'
##
## The following object is masked from 'package:tibble':
##
## view
library(dplyr)
library(skimr)
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(naniar)
##
## Attaching package: 'naniar'
## The following object is masked from 'package:skimr':
##
## n_complete
library(rstatix)
##
## Attaching package: 'rstatix'
## The following object is masked from 'package:janitor':
##
## make_clean_names
## The following object is masked from 'package:stats':
##
## filter
library(dlookr)
## Registered S3 methods overwritten by 'dlookr':
## method from
## plot.transform scales
## print.transform scales
##
## Attaching package: 'dlookr'
## The following object is masked from 'package:tidyr':
##
## extract
## The following object is masked from 'package:base':
##
## transform
Estatice <- read_excel("estatice mulching cant. varas r 24.xlsx")
Estatice %>% glimpse()
## Rows: 90
## Columns: 6
## $ Tratamiento <chr> "c/goteo", "c/goteo", "c/goteo", "c/goteo", "c/goteo", "c/…
## $ Bloqueo <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2…
## $ Color <chr> "rosado", "rosado", "rosado", "rosado", "rosado", "azul", …
## $ CVC <dbl> 1, 8, 2, NA, 2, 3, 4, 6, NA, 2, NA, 3, 4, NA, NA, 4, NA, 3…
## $ CVM <dbl> 30, 19, 28, 2, 8, 23, 28, NA, 6, 5, 30, 10, 14, 10, 12, 7,…
## $ CVL <dbl> NA, NA, NA, 2, NA, 1, 3, NA, 9, 10, 19, 1, 12, 8, 30, 2, 7…
Estatice %>% skim()
| Name | Piped data |
| Number of rows | 90 |
| Number of columns | 6 |
| _______________________ | |
| Column type frequency: | |
| character | 2 |
| numeric | 4 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| Tratamiento | 0 | 1 | 7 | 9 | 0 | 2 | 0 |
| Color | 0 | 1 | 4 | 8 | 0 | 3 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| Bloqueo | 0 | 1.00 | 2.00 | 0.82 | 1 | 1 | 2 | 3.00 | 3 | ▇▁▇▁▇ |
| CVC | 25 | 0.72 | 3.85 | 3.13 | 1 | 2 | 3 | 5.00 | 15 | ▇▂▂▁▁ |
| CVM | 6 | 0.93 | 12.18 | 7.60 | 1 | 7 | 11 | 15.25 | 33 | ▆▇▅▁▂ |
| CVL | 32 | 0.64 | 9.48 | 7.91 | 1 | 4 | 8 | 11.00 | 36 | ▇▃▁▁▁ |
Estatice <- Estatice %>%
rename( Cant_varas_cortas = CVC,
Cant_varas_medianas = CVM , Cant_varas_largas = CVL)
Estatice %>% get_dupes(Tratamiento, Color)
## # A tibble: 90 × 7
## Tratamiento Color dupe_count Bloqueo Cant_varas_cortas Cant_varas_medianas
## <chr> <chr> <int> <dbl> <dbl> <dbl>
## 1 c/goteo amarillo 15 1 NA 30
## 2 c/goteo amarillo 15 1 3 10
## 3 c/goteo amarillo 15 1 4 14
## 4 c/goteo amarillo 15 1 NA 10
## 5 c/goteo amarillo 15 1 NA 12
## 6 c/goteo amarillo 15 2 NA 9
## 7 c/goteo amarillo 15 2 NA 18
## 8 c/goteo amarillo 15 2 8 18
## 9 c/goteo amarillo 15 2 10 NA
## 10 c/goteo amarillo 15 2 2 14
## # ℹ 80 more rows
## # ℹ 1 more variable: Cant_varas_largas <dbl>
Estatice %>% n_miss()
## [1] 63
Estatice %>% vis_miss()
### Imputacion de datos faltantes por la media
Estatice <- Estatice %>%
mutate(Cant_varas_cortas =replace_na(Cant_varas_cortas,median(Cant_varas_cortas, na.rm = TRUE)))
Estatice <- Estatice %>%
mutate(Cant_varas_medianas =replace_na(Cant_varas_medianas,median(Cant_varas_medianas, na.rm = TRUE)))
Estatice <- Estatice %>%
mutate(Cant_varas_largas =replace_na(Cant_varas_largas,median(Cant_varas_largas, na.rm = TRUE)))
descr(Estatice$Cant_varas_cortas)
## Descriptive Statistics
## Estatice$Cant_varas_cortas
## N: 90
##
## Cant_varas_cortas
## ----------------- -------------------
## Mean 3.61
## Std.Dev 2.68
## Min 1.00
## Q1 2.00
## Median 3.00
## Q3 4.00
## Max 15.00
## MAD 1.48
## IQR 2.00
## CV 0.74
## Skewness 1.99
## SE.Skewness 0.25
## Kurtosis 4.24
## N.Valid 90.00
## N 90.00
## Pct.Valid 100.00
descr(Estatice$Cant_varas_medianas)
## Descriptive Statistics
## Estatice$Cant_varas_medianas
## N: 90
##
## Cant_varas_medianas
## ----------------- ---------------------
## Mean 12.10
## Std.Dev 7.34
## Min 1.00
## Q1 7.00
## Median 11.00
## Q3 14.00
## Max 33.00
## MAD 5.93
## IQR 7.00
## CV 0.61
## Skewness 1.06
## SE.Skewness 0.25
## Kurtosis 0.82
## N.Valid 90.00
## N 90.00
## Pct.Valid 100.00
descr(Estatice$Cant_varas_largas)
## Descriptive Statistics
## Estatice$Cant_varas_largas
## N: 90
##
## Cant_varas_largas
## ----------------- -------------------
## Mean 8.96
## Std.Dev 6.37
## Min 1.00
## Q1 6.00
## Median 8.00
## Q3 9.00
## Max 36.00
## MAD 1.48
## IQR 2.75
## CV 0.71
## Skewness 2.12
## SE.Skewness 0.25
## Kurtosis 5.23
## N.Valid 90.00
## N 90.00
## Pct.Valid 100.00
ggplot(Estatice, aes(x = factor (Tratamiento), y = Cant_varas_cortas)) +
geom_boxplot(fill = "lightblue", color = "black") +
theme_minimal() + labs(title = "Influencia del tipo de riego en la cantidad de varas cortas", x = "Tratamiento", y = "Cantidad de varas cortas")
Estatice %>%
identify_outliers(Cant_varas_cortas)
## # A tibble: 10 × 8
## Tratamiento Bloqueo Color Cant_varas_cortas Cant_varas_medianas
## <chr> <dbl> <chr> <dbl> <dbl>
## 1 c/goteo 1 rosado 8 19
## 2 c/goteo 2 rosado 9 19
## 3 c/goteo 2 amarillo 8 18
## 4 c/goteo 2 amarillo 10 11
## 5 c/goteo 3 azul 9 7
## 6 por surco 1 azul 8 13
## 7 por surco 2 amarillo 15 13
## 8 por surco 3 azul 11 1
## 9 por surco 3 amarillo 13 20
## 10 por surco 3 amarillo 9 9
## # ℹ 3 more variables: Cant_varas_largas <dbl>, is.outlier <lgl>,
## # is.extreme <lgl>
Estatice %>%
find_outliers(index = FALSE, rate = TRUE)
## Bloqueo Cant_varas_cortas Cant_varas_medianas Cant_varas_largas
## 0.000 11.111 8.889 15.556
Estatice %>%
diagnose_outlier(Cant_varas_cortas)
## # A tibble: 1 × 6
## variables outliers_cnt outliers_ratio outliers_mean with_mean without_mean
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 Cant_varas_c… 10 11.1 10 3.61 2.81
Estatice <- Estatice %>%
mutate (Cant_varas_cortas = imputate_outlier(Estatice,Cant_varas_cortas,method = "mean", no_attrs =TRUE))
ggplot(Estatice, aes(x = factor (Tratamiento), y = Cant_varas_cortas)) +
geom_boxplot(fill = "lightblue", color = "black") +
theme_minimal() + labs(title = "Influencia del tipo de riego en la cantidad de varas cortas", x = "Tratamiento", y = "Cantidad de varas cortas")
ggplot(Estatice, aes(x = factor (Tratamiento), y = Cant_varas_medianas)) +
geom_boxplot(fill = "orange", color = "black") +
theme_minimal() + labs(title = "Influencia del tipo de riego en la cantidad de varas medianas", x = "Tratamiento", y = "Cantidad de varas medianas")
Estatice %>%
identify_outliers(Cant_varas_medianas)
## # A tibble: 8 × 8
## Tratamiento Bloqueo Color Cant_varas_cortas Cant_varas_medianas
## <chr> <dbl> <chr> <dbl> <dbl>
## 1 c/goteo 1 rosado 1 30
## 2 c/goteo 1 rosado 2 28
## 3 c/goteo 1 azul 4 28
## 4 c/goteo 1 amarillo 3 30
## 5 por surco 1 rosado 7 30
## 6 por surco 2 rosado 2 28
## 7 por surco 2 amarillo 3 33
## 8 por surco 2 amarillo 3 32
## # ℹ 3 more variables: Cant_varas_largas <dbl>, is.outlier <lgl>,
## # is.extreme <lgl>
Estatice <- Estatice %>%
mutate (Cant_varas_medianas = imputate_outlier(Estatice,Cant_varas_medianas,method = "mean", no_attrs =TRUE))
ggplot(Estatice, aes(x = factor (Tratamiento), y = Cant_varas_medianas)) +
geom_boxplot(fill = "orange", color = "black") +
theme_minimal() + labs(title = "Influencia del tipo de riego en la cantidad de varas medianas", x = "Tratamiento", y = "Cantidad de varas medianas")
ggplot(Estatice, aes(x = factor (Tratamiento), y = Cant_varas_cortas)) +
geom_boxplot(fill = "lightgreen", color = "black") +
theme_minimal() + labs(title = "Influencia del tipo de riego en la cantidad de varas largas", x = "Tratamiento", y = "Cantidad de varas largas")
Estatice %>%
identify_outliers(Cant_varas_largas)
## # A tibble: 19 × 8
## Tratamiento Bloqueo Color Cant_varas_cortas Cant_varas_medianas
## <chr> <dbl> <chr> <dbl> <dbl>
## 1 c/goteo 1 rosado 3 2
## 2 c/goteo 1 azul 3 23
## 3 c/goteo 1 amarillo 3 12.1
## 4 c/goteo 1 amarillo 3 10
## 5 c/goteo 1 amarillo 3 12
## 6 c/goteo 2 rosado 4 7
## 7 c/goteo 2 rosado 3 10
## 8 c/goteo 2 azul 3 12
## 9 c/goteo 2 azul 2 11
## 10 c/goteo 2 amarillo 3 18
## 11 c/goteo 2 amarillo 2 14
## 12 c/goteo 3 rosado 1 2
## 13 c/goteo 3 azul 1 8
## 14 por surco 1 amarillo 2 14
## 15 por surco 2 rosado 2 12.1
## 16 por surco 2 rosado 3 11
## 17 por surco 2 amarillo 1 13
## 18 por surco 3 amarillo 3 4
## 19 por surco 3 amarillo 2 7
## # ℹ 3 more variables: Cant_varas_largas <dbl>, is.outlier <lgl>,
## # is.extreme <lgl>
Estatice <- Estatice %>%
mutate (Cant_varas_largas = imputate_outlier(Estatice,Cant_varas_largas,method = "mean", no_attrs =TRUE))
ggplot(Estatice, aes(x = factor (Tratamiento), y = Cant_varas_cortas)) +
geom_boxplot(fill = "lightgreen", color = "black") +
theme_minimal() + labs(title = "Influencia del tipo de riego en la cantidad de varas largas", x = "Tratamiento", y = "Cantidad de varas largas")