import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
# Data source : https://open.canada.ca
raw_data = pd.read_csv("https://health-infobase.canada.ca/src/data/covidLive/covid19-download.csv")
# copy data to a new variable
df = raw_data.copy()
# check the first 5 rows of data
df.head()
##    pruid            prname  ... avgdeaths_last7 avgratedeaths_last7
## 0     59  British Columbia  ...             0.0                 0.0
## 1     48           Alberta  ...             0.0                 0.0
## 2     47      Saskatchewan  ...             0.0                 0.0
## 3     46          Manitoba  ...             0.0                 0.0
## 4     35           Ontario  ...             0.0                 0.0
## 
## [5 rows x 23 columns]
# check the statistical analysis for numerical columns
df.describe()
##              pruid  reporting_week  ...  avgdeaths_last7  avgratedeaths_last7
## count  1905.000000     1905.000000  ...      1905.000000          1778.000000
## mean     39.200000       24.803150  ...         6.289071             0.089927
## std      26.290483       14.668092  ...        18.005564             0.153713
## min       1.000000        1.000000  ...         0.000000             0.000000
## 25%      12.000000       12.000000  ...         0.000000             0.000000
## 50%      46.000000       23.000000  ...         0.140000             0.020000
## 75%      60.000000       37.000000  ...         3.430000             0.120000
## max      99.000000       53.000000  ...       160.430000             1.260000
## 
## [8 rows x 20 columns]
# Drop pruid from the dataframe
df.drop(['pruid'], inplace = True, axis = 1)
df.describe()
##        reporting_week  reporting_year  ...  avgdeaths_last7  avgratedeaths_last7
## count     1905.000000     1905.000000  ...      1905.000000          1778.000000
## mean        24.803150     2020.818898  ...         6.289071             0.089927
## std         14.668092        0.747025  ...        18.005564             0.153713
## min          1.000000     2020.000000  ...         0.000000             0.000000
## 25%         12.000000     2020.000000  ...         0.000000             0.000000
## 50%         23.000000     2021.000000  ...         0.140000             0.020000
## 75%         37.000000     2021.000000  ...         3.430000             0.120000
## max         53.000000     2022.000000  ...       160.430000             1.260000
## 
## [8 rows x 19 columns]