library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(pastecs)
##
## Attaching package: 'pastecs'
##
## The following objects are masked from 'package:dplyr':
##
## first, last
##
## The following object is masked from 'package:tidyr':
##
## extract
library(readxl)
wd<-getwd()
ECD<-read_xlsx("ECD.xlsx")
###2)THE VARIABLE BEING USED REPRESENTS THE AMOUNT OF ENERGY CONSUMED IN 2023 BY HOUSEHOLDS WHO HAVE APPLIED FOR UTILITY ASSISTANCE. THE ENERGY CONSUMPTION IS READ IN kWh AND MEASURES THE TOTAL AMOUNT OF ENERGY USED IN 12 MONTHS, FROM JANUARY 2023-DECEMBER 2023.
stat.desc(ECD$`2023 Total Usage`, norm=T)
## nbr.val nbr.null nbr.na min max range
## 2.990000e+02 0.000000e+00 0.000000e+00 5.814000e+03 7.949500e+04 7.368100e+04
## sum median mean SE.mean CI.mean.0.95 var
## 6.281496e+06 1.876100e+04 2.100835e+04 6.111502e+02 1.202717e+03 1.116778e+08
## std.dev coef.var skewness skew.2SE kurtosis kurt.2SE
## 1.056777e+04 5.030274e-01 1.713898e+00 6.079641e+00 4.766930e+00 8.482339e+00
## normtest.W normtest.p
## 8.774358e-01 1.006532e-14
###3)REMOVING NA’S: DATA DID NOT CONTAIN ANY NA’S
ECD_clear<- ECD %>% filter(!is.na(`2023 Total Usage`))
###4)HISTROGRAM OF THE VARIABLE
hist(ECD_clear$`2023 Total Usage`, main="2023 Total Energy Consumption", xlab="Energy Consumption", breaks=75)
###5)TRANSFORMING VARIABLE USING SQUAREROOT/ NOTE:TU= TOTAL USAGE
ECD_sqrt <- ECD_clear %>% mutate(TU_SQRT=sqrt(`2023 Total Usage`)) %>% select(`2023 Total Usage`,TU_SQRT)
head(ECD_sqrt)
## # A tibble: 6 × 2
## `2023 Total Usage` TU_SQRT
## <dbl> <dbl>
## 1 8804 93.8
## 2 5892 76.8
## 3 5814 76.2
## 4 7730 87.9
## 5 8601 92.7
## 6 16841 130.
###6) HISTROGRAM OF VARIBLE TRANFORMED INTO SQUAREROOT: BEFORE AND AFTER###
###BOTH HISTROGRAMS, THE BEFORE AND AFTER, SHOW THAT THE TAIL IS TO THE RIGHT, MEANING DATA IS RIGHT SKEWED. HOWEVER, THE SQUAREROOT HISTOGRAM SHOWS THAT THE DISTRIBUTION IS A BIT MORE CENTERED THAN THE ORIGINAL HISTOGRAM.
hist(ECD_clear$`2023 Total Usage`,breaks=10,probability = T)
lines(density(ECD_clear$`2023 Total Usage`),col='red',lwd=2)
hist(ECD_sqrt$TU_SQRT, breaks=10,probability = T)
lines(density(ECD_sqrt$TU_SQRT),col='red',lwd=2)