Objetivo general
Comprobar el avance en el uso de las herramientas de analisis y visualización aprendidas.
Objetivos especificos
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.3
## Warning: package 'ggplot2' was built under R version 4.1.2
## Warning: package 'tibble' was built under R version 4.1.3
## Warning: package 'tidyr' was built under R version 4.1.2
## Warning: package 'readr' was built under R version 4.1.3
## Warning: package 'purrr' was built under R version 4.1.2
## Warning: package 'dplyr' was built under R version 4.1.3
## Warning: package 'stringr' was built under R version 4.1.2
## Warning: package 'forcats' was built under R version 4.1.3
library(DT)
## Warning: package 'DT' was built under R version 4.1.3
library(plotly)
## Warning: package 'plotly' was built under R version 4.1.3
setwd("C:/Users/YesicaRodriguez/OneDrive - SEI/Documents/Coderhouse")
datasteel <- read.csv("Steel_industry_data.csv")
str(datasteel)
## 'data.frame': 35040 obs. of 11 variables:
## $ ï..Date : chr "01/01/2018 00:15" "01/01/2018 00:30" "01/01/2018 00:45" "01/01/2018 01:00" ...
## $ Usage_kWh : num 3.17 4 3.24 3.31 3.82 3.28 3.6 3.6 3.28 3.78 ...
## $ Lagging_Current_Reactive_Power_kVarh: num 2.95 4.46 3.28 3.56 4.5 3.56 4.14 4.28 3.64 4.72 ...
## $ Leading_Current_Reactive_Power_kVarh: num 0 0 0 0 0 0 0 0 0 0 ...
## $ CO2.tCO2. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Lagging_Current_Power_Factor : num 73.2 66.8 70.3 68.1 64.7 ...
## $ Leading_Current_Power_Factor : num 100 100 100 100 100 100 100 100 100 100 ...
## $ NSM : int 900 1800 2700 3600 4500 5400 6300 7200 8100 9000 ...
## $ Week_Status : chr "Weekday" "Weekday" "Weekday" "Weekday" ...
## $ Day_of_week : chr "Monday" "Monday" "Monday" "Monday" ...
## $ Load_Type : chr "Light_Load" "Light_Load" "Light_Load" "Light_Load" ...
names(datasteel)
## [1] "ï..Date"
## [2] "Usage_kWh"
## [3] "Lagging_Current_Reactive_Power_kVarh"
## [4] "Leading_Current_Reactive_Power_kVarh"
## [5] "CO2.tCO2."
## [6] "Lagging_Current_Power_Factor"
## [7] "Leading_Current_Power_Factor"
## [8] "NSM"
## [9] "Week_Status"
## [10] "Day_of_week"
## [11] "Load_Type"
resumen1 <- datasteel %>%
mutate(
Day_of_week = factor(Day_of_week),
Load_Type = factor(Load_Type)) %>%
group_by(Day_of_week, Load_Type) %>%
summarise(media_KWh = mean(Usage_kWh), .groups = "drop")
resumen1 %>%
DT::datatable(
rownames = FALSE,
filter = 'top')
ggplot(datasteel, aes(Usage_kWh, fill = factor(Day_of_week))) +
geom_histogram(color = "gray")
ggplot(datasteel) +
geom_point(
mapping = aes(x = Usage_kWh, y = Lagging_Current_Power_Factor, color = Day_of_week))
if(any(is.na(datasteel))) {
datos_vacios = sum(is.na(datasteel))
print(datos_vacios)
} else {
print("La base de datos no tiene datos NA")
}
## [1] "La base de datos no tiene datos NA"
if(nrow(datasteel) == 0) {
print("La base de datos esta vacia")
} else {
dimensiones_datasteel = dim(datasteel)
print(c("La base de datos tiene las siguientes dimensiones:", dimensiones_datasteel))
}
## [1] "La base de datos tiene las siguientes dimensiones:"
## [2] "35040"
## [3] "11"