This RMarkdown document analyzes weather dataset to gain insights into various aspects of weather data.
We will start by Loading the weather dataset.
weather_data <- read.csv("C:\\Users\\singh\\Documents\\StatsR\\dataset\\Final\\weather_repo.csv")
head(weather_data, n = 10)
## country location_name latitude longitude
## 1 Afghanistan Kabul 34.52 69.18
## 2 Albania Tirana 41.33 19.82
## 3 Algeria Algiers 36.76 3.05
## 4 Andorra Andorra La Vella 42.50 1.52
## 5 Angola Luanda -8.84 13.23
## 6 Antigua and Barbuda Saint John's 17.12 -61.85
## 7 Argentina Buenos Aires -34.59 -58.67
## 8 Armenia Yerevan 40.18 44.51
## 9 Australia Canberra -35.28 149.22
## 10 Austria Vienna 48.20 16.37
## timezone last_updated_epoch last_updated
## 1 Asia/Kabul 1693301400 2023-08-29 14:00
## 2 Europe/Tirane 1693301400 2023-08-29 11:30
## 3 Africa/Algiers 1693301400 2023-08-29 10:30
## 4 Europe/Andorra 1693301400 2023-08-29 11:30
## 5 Africa/Luanda 1693301400 2023-08-29 10:30
## 6 America/Antigua 1693301400 2023-08-29 05:30
## 7 America/Argentina/Buenos_Aires 1693301400 2023-08-29 06:30
## 8 Asia/Yerevan 1693301400 2023-08-29 13:30
## 9 Australia/Sydney 1693301400 2023-08-29 19:30
## 10 Europe/Vienna 1693301400 2023-08-29 11:30
## temperature_celsius temperature_fahrenheit condition_text wind_mph wind_kph
## 1 28.8 83.8 Sunny 7.2 11.5
## 2 27.0 80.6 Partly cloudy 3.8 6.1
## 3 28.0 82.4 Partly cloudy 8.1 13.0
## 4 10.2 50.4 Sunny 6.0 9.7
## 5 25.0 77.0 Partly cloudy 2.2 3.6
## 6 29.0 84.2 Light rain 9.4 15.1
## 7 9.0 48.2 Clear 6.9 11.2
## 8 31.0 87.8 Partly cloudy 5.6 9.0
## 9 13.0 55.4 Clear 9.4 15.1
## 10 16.0 60.8 Light rain 11.9 19.1
## wind_degree wind_direction pressure_mb pressure_in precip_mm precip_in
## 1 74 ENE 1004 29.64 0.0 0.00
## 2 210 SSW 1006 29.71 0.0 0.00
## 3 240 WSW 1014 29.94 0.0 0.00
## 4 345 NNW 1015 29.97 0.0 0.00
## 5 270 W 1016 30.00 0.0 0.00
## 6 90 E 1015 29.97 0.3 0.01
## 7 70 ENE 1023 30.21 0.0 0.00
## 8 170 S 1003 29.62 0.0 0.00
## 9 10 N 1017 30.03 0.0 0.00
## 10 320 NW 1005 29.68 0.0 0.00
## humidity cloud feels_like_celsius feels_like_fahrenheit visibility_km
## 1 19 0 26.7 80.1 10
## 2 54 75 28.0 82.3 10
## 3 30 25 27.4 81.3 10
## 4 51 6 8.9 48.0 10
## 5 69 75 26.9 80.4 10
## 6 79 75 34.0 93.2 10
## 7 71 0 8.0 46.4 10
## 8 26 25 29.0 84.2 10
## 9 62 0 12.7 54.8 10
## 10 82 75 16.0 60.8 10
## visibility_miles uv_index gust_mph gust_kph air_quality_Carbon_Monoxide
## 1 6 7 8.3 13.3 647.5
## 2 6 6 7.4 11.9 433.9
## 3 6 7 3.4 5.4 647.5
## 4 6 4 7.4 11.9 190.3
## 5 6 6 3.6 5.8 2136.2
## 6 6 1 23.3 37.4 200.3
## 7 6 1 8.5 13.7 270.4
## 8 6 8 5.1 8.3 212.0
## 9 6 1 9.4 15.1 203.6
## 10 6 4 16.1 25.9 320.4
## air_quality_Ozone air_quality_Nitrogen_dioxide air_quality_Sulphur_dioxide
## 1 130.2 1.2 0.4
## 2 104.4 3.6 1.8
## 3 16.6 63.1 12.6
## 4 68.0 0.2 0.2
## 5 147.3 52.8 26.9
## 6 16.6 0.5 0.5
## 7 18.8 10.7 1.3
## 8 121.6 1.0 1.1
## 9 44.0 3.5 0.5
## 10 30.0 29.1 13.0
## air_quality_PM2.5 air_quality_PM10 air_quality_us.epa.index
## 1 7.9 11.1 1
## 2 28.2 29.6 2
## 3 6.4 7.9 1
## 4 0.5 0.8 1
## 5 139.6 203.3 4
## 6 0.8 1.9 1
## 7 2.1 3.5 1
## 8 5.0 6.2 1
## 9 4.0 5.8 1
## 10 13.1 14.9 1
## air_quality_gb.defra.index sunrise sunset moonrise moonset
## 1 1 05:24 AM 06:24 PM 05:39 PM 02:48 AM
## 2 3 06:04 AM 07:19 PM 06:50 PM 03:25 AM
## 3 1 06:16 AM 07:21 PM 06:46 PM 03:50 AM
## 4 1 07:16 AM 08:34 PM 08:08 PM 04:38 AM
## 5 10 06:11 AM 06:06 PM 04:43 PM 04:41 AM
## 6 1 05:53 AM 06:23 PM 05:36 PM 04:09 AM
## 7 1 07:18 AM 06:34 PM 04:57 PM 06:32 AM
## 8 1 06:26 AM 07:39 PM 07:05 PM 03:45 AM
## 9 1 06:27 AM 05:42 PM 03:19 PM 05:17 AM
## 10 2 06:07 AM 07:43 PM 07:27 PM 03:11 AM
## moon_phase moon_illumination
## 1 Waxing Gibbous 93
## 2 Waxing Gibbous 93
## 3 Waxing Gibbous 93
## 4 Waxing Gibbous 93
## 5 Waxing Gibbous 93
## 6 Waxing Gibbous 93
## 7 Waxing Gibbous 93
## 8 Waxing Gibbous 93
## 9 Waxing Gibbous 93
## 10 Waxing Gibbous 93
Unique values and their counts
location_name
location_counts <- table(weather_data$location_name)
head(location_counts, n = 25) # counts for first 25
##
## 'S-Gravenwezel Abu Dhabi Abuja Accra
## 13 13 13 13
## Addis Ababa Adkip Algiers Amman
## 13 13 13 13
## Amsterdam Andorra La Vella Ankara Antananarivo
## 13 13 13 13
## Apia Ashgabat Asmara Astana
## 13 13 13 13
## Athens Bafoussam Baghdad Baku
## 13 13 13 13
## Bamako Bandar Seri Begawan Bangkok Bangui
## 13 13 13 13
## Banjul
## 13
wind_direction
wind_dir <- table(weather_data$wind_direction)
wind_dir
##
## E ENE ESE N NE NNE NNW NW S SE SSE SSW SW W WNW WSW
## 243 165 171 288 158 114 98 117 183 131 126 165 171 147 99 158
moon_phase
moon_pha <- table(weather_data$moon_phase)
moon_pha
##
## Full Moon Last Quarter Waning Crescent Waning Gibbous Waxing Gibbous
## 390 195 585 1169 195
condition_text
weather_condition <- table(weather_data$condition_text)
weather_condition
##
## Clear Cloudy
## 878 11
## Fog Heavy rain
## 24 4
## Heavy rain at times Light drizzle
## 2 3
## Light rain Light rain shower
## 65 58
## Mist Moderate or heavy rain shower
## 61 9
## Moderate or heavy rain with thunder Moderate rain
## 32 16
## Moderate rain at times Overcast
## 3 81
## Partly cloudy Patchy light drizzle
## 1036 1
## Patchy light rain Patchy light rain with thunder
## 1 26
## Patchy rain possible Sunny
## 70 145
## Thundery outbreaks possible Torrential rain shower
## 5 3
Numeric columns, and their min/max, central tendency, and some notion of distribution (e.g., quantiles)
temperature_celsius
temp_summary <- summary(weather_data[, c("temperature_celsius")])
print(temp_summary)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.90 17.80 23.60 22.49 27.50 45.00
wind_mph
wind_summary <- summary(weather_data[, c("wind_mph")])
print(wind_summary)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.200 3.100 5.600 6.559 8.900 43.800
pressure_in
pressure_summary <- summary(weather_data[, c("pressure_in")])
print(pressure_summary)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 29.30 29.83 29.92 29.93 30.03 30.59
precip_in
precip_summary <- summary(weather_data[, c("precip_in")])
print(precip_summary)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000000 0.000000 0.000000 0.009432 0.000000 1.220000
humidity
humidity_summary <- summary(weather_data[, c("humidity")])
print(humidity_summary)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 6.00 64.00 78.00 73.28 89.00 100.00
feels_like_celsius
flc_summary <- summary(weather_data[, c("feels_like_celsius")])
print(flc_summary)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.80 17.80 25.20 24.28 30.30 73.60
visibility_miles
visibility_summary <- summary(weather_data[, c("visibility_miles")])
print(visibility_summary)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 6.00 6.00 5.81 6.00 19.00
weather_data %>%
summarise(var_temp=var(temperature_celsius), var_humidity=var(humidity), var_wind=var(wind_mph),var_pressure=var(pressure_in), var_visibility=var(visibility_miles), var_gust = var(gust_mph), var_air_quality_Carbon_Monoxide = var(air_quality_Carbon_Monoxide), var_air_quality_Ozone = var(air_quality_Ozone), var_air_quality_Nitrogen_dioxide = var(air_quality_Nitrogen_dioxide), var_air_quality_Sulphur_dioxide = var(air_quality_Sulphur_dioxide))
## var_temp var_humidity var_wind var_pressure var_visibility var_gust
## 1 41.9546 418.7166 20.81293 0.02999758 2.50521 47.69076
## var_air_quality_Carbon_Monoxide var_air_quality_Ozone
## 1 895511.8 1093.087
## var_air_quality_Nitrogen_dioxide var_air_quality_Sulphur_dioxide
## 1 359.7987 200.6711
weather_data %>%
summarise(sd_temp=sd(temperature_celsius), sd_humidity=sd(humidity), sd_wind=sd(wind_mph),sd_pressure=sd(pressure_in), sd_visibility=sd(visibility_miles), sd_gust = sd(gust_mph), sd_air_quality_Carbon_Monoxide = sd(air_quality_Carbon_Monoxide), sd_air_quality_Ozone = sd(air_quality_Ozone), sd_air_quality_Nitrogen_dioxide = sd(air_quality_Nitrogen_dioxide), sd_air_quality_Sulphur_dioxide = var(air_quality_Sulphur_dioxide))
## sd_temp sd_humidity sd_wind sd_pressure sd_visibility sd_gust
## 1 6.477237 20.46257 4.562119 0.1731981 1.582786 6.90585
## sd_air_quality_Carbon_Monoxide sd_air_quality_Ozone
## 1 946.3149 33.06186
## sd_air_quality_Nitrogen_dioxide sd_air_quality_Sulphur_dioxide
## 1 18.96836 200.6711
weather_data %>%
summarise(IQR_temp=IQR(temperature_celsius), IQR_humidity=IQR(humidity), IQR_wind=IQR(wind_mph),IQR_pressure=IQR(pressure_in), IQR_visibility=IQR(visibility_miles), IQR_gust = IQR(gust_mph), IQR_air_quality_Carbon_Monoxide = IQR(air_quality_Carbon_Monoxide), IQR_air_quality_Ozone = IQR(air_quality_Ozone), IQR_air_quality_Nitrogen_dioxide = IQR(air_quality_Nitrogen_dioxide), IQR_air_quality_Sulphur_dioxide = IQR(air_quality_Sulphur_dioxide))
## IQR_temp IQR_humidity IQR_wind IQR_pressure IQR_visibility IQR_gust
## 1 9.7 25 5.8 0.2 0 8.5
## IQR_air_quality_Carbon_Monoxide IQR_air_quality_Ozone
## 1 213.6 37.7
## IQR_air_quality_Nitrogen_dioxide IQR_air_quality_Sulphur_dioxide
## 1 10.425 4.9
weather_data %>%
summarise(mad_temp=mad(temperature_celsius), mad_humidity=mad(humidity), mad_wind=mad(wind_mph),mad_pressure=mad(pressure_in), mad_visibility=mad(visibility_miles), mad_gust = mad(gust_mph), mad_air_quality_Carbon_Monoxide = mad(air_quality_Carbon_Monoxide), mad_air_quality_Ozone = mad(air_quality_Ozone), mad_air_quality_Nitrogen_dioxide = mad(air_quality_Nitrogen_dioxide), mad_air_quality_Sulphur_dioxide = mad(air_quality_Sulphur_dioxide))
## mad_temp mad_humidity mad_wind mad_pressure mad_visibility mad_gust
## 1 6.81996 16.3086 4.00302 0.133434 0 6.22692
## mad_air_quality_Carbon_Monoxide mad_air_quality_Ozone
## 1 99.03768 27.87288
## mad_air_quality_Nitrogen_dioxide mad_air_quality_Sulphur_dioxide
## 1 4.74432 1.77912
Does weather condition (e.g., clear, rainy, snowy) correlate with air quality in different countries?
How does temperature and humidity affect the perceived temperature (feels-like temperature) in different locations?
Are there any patterns in moon phase that correlate with wind speed and direction?
Is there a correlation between visibility and the time of sunset?
How does latitude impact air quality and temperature variation?
ggplot(data=weather_data)+
geom_point(mapping=aes(x=location_name, y=temperature_celsius))+
theme(axis.text.x = element_text(angle = 45, hjust = 1))
ggplot(weather_data, aes(x = temperature_celsius)) +
geom_histogram(binwidth = 2, fill = "skyblue", color = "black") +
labs(x = "Temperature (Celsius)", y = "Frequency") +
ggtitle("Distribution of Temperature (Celsius)")
ggplot(weather_data, aes(x = temperature_celsius, y = humidity)) +
geom_point() +
labs(x = "Temperature (Celsius)", y = "Humidity (%)") +
ggtitle("Scatter Plot of Temperature vs. Humidity") +
scale_color_brewer(palette = "Set1")
ggplot(weather_data, aes(x = temperature_celsius, y = wind_mph, color = condition_text)) +
geom_point() +
labs(x = "Temperature (Celsius)", y = "Wind Speed (mph)") +
ggtitle("Scatter Plot of Temperature vs. Wind Speed") +
scale_color_manual(values = c("Clear" = "blue", "Fog" = "green", "Heavy rain" = "red"))
# Correlation matrix
correlation_matrix <- cor(weather_data[, c("temperature_celsius", "humidity", "wind_mph", "pressure_mb", "precip_mm")])
# Heatmap
heatmap_data <- melt(correlation_matrix)
ggplot(heatmap_data, aes(Var1, Var2, fill = value)) +
geom_tile() +
scale_fill_gradient(low = "blue", high = "red") +
labs(x = "Variable 1", y = "Variable 2", fill = "Correlation") +
theme_minimal()