data <- read.csv("C:\\Users\\91814\\Desktop\\Statistics\\nurses.csv")
library(readr)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.3.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
data

Extracting the pageviews data from the Registered Nurses page form the internet and reading into a dataframe.

newdata <- read.csv("C:\\Users\\91814\\Desktop\\Statistics\\pageviews.csv")

Printing the new dataframe

newdata
library(tsibble)
## Warning: package 'tsibble' was built under R version 4.3.3
## 
## Attaching package: 'tsibble'
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, union

Reading distinct values from the new data

data1 <- distinct(newdata)

Checking for duplicates if any

duplicated_dates <- duplicated(data1$Date)
print(data1[duplicated_dates, ])
## [1] Date             Registered.nurse
## <0 rows> (or 0-length row.names)

Summarizing the data by date

data2 <- data1 %>% 
  group_by(Date) %>% 
  summarize(Registered.nurse = sum(Registered.nurse))

Printing the summarized data

data2

Plotting the data

# Load required library
library(ggplot2)


# Convert Date column to date format
data2$Date <- as.Date(data2$Date)

# Create line plot
ggplot(data2, aes(x = Date, y = Registered.nurse)) +
  geom_line() +
  geom_point() +
  labs(x = "Date", y = "Number of Registered Nurses", title = "Registered Nurses Over Time")

Plotting a subset graph


As we can see no consistent trend in the above graph, we take a subset to analyse the trends.

library(dplyr)

# Convert Date column to date format
data2$Date <- as.Date(data2$Date)

# Filter data from August 2023 to January 2024
filtered_data <- data2 %>%
  filter(Date >= as.Date("2023-08-01") & Date <= as.Date("2024-01-31"))

# Create line plot for filtered data
ggplot(filtered_data, aes(x = Date, y = Registered.nurse)) +
  geom_line() +
  geom_point() +
  labs(x = "Date", y = "Number of Registered Nurses", title = "Registered Nurses from August 2023 to January 2024")

Initial High Levels: The high number of registered nurses in the first week of August (above 800) might be influenced by specific factors such as seasonal hiring, temporary staffing increases due to anticipated demand, or other operational reasons specific to this period.

Subsequent Increase: The further increase in the second week could suggest additional temporary factors or delayed staffing responses to anticipated needs. Steady Decline: The significant and steady decline from mid-August to below 400 by the end of the month, followed by a continued decrease through to January, is particularly noteworthy. This decline could be due to several factors:

# Create line plot
ggplot(filtered_data, aes(x = Date, y = Registered.nurse)) +
  geom_smooth(method = "lm", se = FALSE) +
  geom_line() +
  geom_point() +
  labs(x = "Date", y = "Number of Registered Nurses", title = "Registered Nurses Over Time")
## `geom_smooth()` using formula = 'y ~ x'

As suspected , we can see from the above graph , the trends seem to decline between August 2023 to Jan 2024 although there’s has been a sharp increase and decline in mid august 2023.

library(forecast)
## Warning: package 'forecast' was built under R version 4.3.3
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
# Load required libraries
library(ggplot2)


# Convert Date column to date format
data2$Date <- as.Date(data2$Date)

# Perform lowess smoothing
smoothed_data <- loess(Registered.nurse ~ as.numeric(Date), data = data2)

# Predict smoothed values
smoothed_values <- predict(smoothed_data)

# Create a data frame with original and smoothed values
smoothed_df <- data.frame(Date = data2$Date, Original = data2$Registered.nurse, Smoothed = smoothed_values)

# Plot original and smoothed data
ggplot(smoothed_df, aes(x = Date)) +
  geom_line(aes(y = Original), color = "blue", alpha = 0.5) +
  geom_line(aes(y = Smoothed), color = "red") +
  labs(x = "Date", y = "Number of Registered Nurses", title = "Original vs Smoothed Data")

  1. Original Data (Blue Line): This line represents the actual recorded values for the number of registered nurses. The data appears to fluctuate significantly over time, with some notable peaks and troughs.

    Smoothed Data (Red Line): This line shows a smoothed version of the original data, likely calculated using a moving average or some other smoothing technique. The purpose of this line is to help visualize the underlying trends by reducing the impact of short-term fluctuations and noise.

# Load the forecast package
library(forecast)

# Convert data to a time series object
ts_data <- ts(data2$Registered.nurse, frequency = 7)  # Assuming weekly data, adjust frequency as needed

# Plot ACF
Acf(ts_data, main = "Autocorrelation Function (ACF)")

# Plot PACF
Pacf(ts_data, main = "Partial Autocorrelation Function (PACF)")

Interpretation of the ACF: