Using data on deaths reported by municipalities during COVID, I show that there is an underreporting of cases reported in the official media in El Salvador.
import pandas as pd
import numpy as np
import os
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter
import matplotlib.ticker as ticker
%matplotlib inline
from matplotlib import rcParams
from matplotlib.patches import Polygon
import matplotlib.lines as mlines
import matplotlib.dates as mdates
Reset css and font defaults in: /Users/eleno/.jupyter/custom & /Users/eleno/Library/Jupyter/nbextensions
data.rename(columns = {'ANIO':'year', 'MES': 'month', 'DIA': 'day'}, inplace = True)
data['date'] = pd.to_datetime(data[['year','month','day']], errors = 'coerce')
#estimating average of 2015 and 2019
data['freq'] = 1
data['day'] = pd.to_numeric(data['day'], errors = 'coerce')
data['month'] = pd.to_numeric(data['month'], errors = 'coerce')
data['year'] = pd.to_numeric(data['year'], errors = 'coerce')
year_2015_2019 = (data['year']>=2015) & (data['year']<=2019)
mean = data[year_2015_2019].pivot_table(index = ['day','month'], values = 'freq', aggfunc = 'sum').reset_index()
mean['freq'] = mean['freq']/5
mean.loc[(mean['day']==29) & (mean['month']==2),'freq'] = mean.loc[(mean['day']==29) & (mean['month']==2),'freq']*5
#merge
data = data.merge(mean, on = ['day', 'month'])
data = data.rename(columns = {'freq_x':'freq', 'freq_y':'mean'})
data_line = pd.pivot_table(data, index = 'date', values = ['freq', 'mean'],
aggfunc = {'freq': np.sum,'mean':np.mean }).reset_index()
graph = data_line[(data_line['date']>= '01-01-2020') & (data_line['date']<= '09-10-2020') ]
plt.style.use('bmh')
fig, ax = plt.subplots(figsize = (10, 6))
myFmt = mdates.DateFormatter('%b')
ax.set_prop_cycle(color=['#2b728f', '#cc2975', 'blue'])
ax.plot(graph['date'], graph[['freq','mean']])
ax.xaxis.set_major_formatter(myFmt)
for axis in ['bottom','left']:
ax.spines[axis].set_linewidth(2)
ax.spines[axis].set_color("#FF3636")
for axis in ['top']:
ax.spines[axis].set_linewidth(2)
ax.spines[axis].set_color("white")
fig.text( x = 0.35,
y = 1,
s = 'Daily deaths reported',
fontsize = 15, alpha = .75, weight = 'bold')
fig.text( x = 0.15,
y = 0.05,
s = 'Source: Registro Nacional de Personas Naturales',
fontsize = 9, alpha = .75)
fig.text( x = 0.15,
y = 0.03,
s = 'Note: Some deaths have not yet been recorded, especially for more recent dates',
fontsize = 9, alpha = .75)
fig.text( x = 0.15,
y = 0.01,
s = 'for this reason a fall is noticed in the last dates',
fontsize = 9, alpha = .75)
fig.patch.set_facecolor('white')
blue = mlines.Line2D([], [], color='#2b728f',
markersize=15, label='Daily deaths in 2020')
pink = mlines.Line2D([], [], color='#cc2975',
markersize=15, label='5-year daily average')
fig.legend(handles=[pink, blue], loc = [0.37, 0.86])
plt.savefig('muertes_eng.png', bbox_inches='tight', dpi=600)
crimen = pd.read_csv('crimenes.csv')
from pandas.tseries.offsets import MonthEnd
old = ['ENE', 'FEB', 'MAR', 'ABR', 'MAY', 'JUN', 'JUL', 'AGO', 'SEP','OCT', 'NOV', 'DIC']
new = [1,2,3,4,5,6,7,8,9,10,11,12]
crimen['Mes'] = crimen.Mes.replace(old,new)
crimen['day'] = 1
crimen.rename(columns = {'Mes': 'month', 'Año': 'year'}, inplace = True)
crimen['date'] = pd.to_datetime(crimen[['year','month','day']], errors = 'coerce') + MonthEnd(1)
#reagruping
acc = pd.pivot_table(crimen, index = 'date', values =
'HOMICIDIO CULPOSO/Total', aggfunc = 'sum').reset_index()
condition = (acc.date.dt.day != 29) & (acc.date.dt.month != 2)
acc['freq'] = acc['HOMICIDIO CULPOSO/Total']/acc.date.dt.day
acc = acc[condition].set_index('date').resample('D').asfreq().interpolate(method = 'linear').reset_index()
acc['month'] = acc.date.dt.month
acc['day'] = acc.date.dt.day
year_2015_2019 = (acc['date']<'01-01-2020')
mean = acc[year_2015_2019].pivot_table(index = ['day','month'], values = 'freq', aggfunc = 'sum').reset_index()
mean['freq'] = mean['freq']/5
mean.loc[(mean['day']<=30) & (mean['month']==1),'freq'] = mean.loc[(mean['day']<=30) & (mean['month']==1),'freq']*5/4
mean.loc[(mean['day']==29) & (mean['month']==2),'freq'] = mean.loc[(mean['day']==1) & (mean['month']==3),'freq']
#merge
acc = acc.merge(mean, on = ['day', 'month'])
acc = acc.rename(columns = {'freq_x':'freq', 'freq_y':'mean'})
data_line = pd.pivot_table(acc, index = 'date', values = ['freq', 'mean'],
aggfunc = {'freq': np.sum,'mean':np.mean }).reset_index()
data_line.sort_values(by = ['date'], inplace = True)
data_line.loc[data_line['date'] == '2020-02-29', 'mean'] = data_line.loc[data_line['date'] == '2020-03-01', 'mean'].values[0]
data_line[['freq', 'mean']] = data_line[['freq','mean']].rolling(window=7).mean()
graph = data_line[(data_line['date']>= '09-01-2019') & (data_line['date']<= '09-10-2020') ]
plt.style.use('bmh')
fig, ax = plt.subplots(figsize = (10, 6))
myFmt = mdates.DateFormatter('%b')
ax.set_prop_cycle(color=['#2b728f', '#cc2975', 'blue'])
ax.plot(graph['date'], graph[['freq','mean']])
ax.xaxis.set_major_formatter(myFmt)
for axis in ['bottom','left']:
ax.spines[axis].set_linewidth(2)
ax.spines[axis].set_color("#FF3636")
for axis in ['top']:
ax.spines[axis].set_linewidth(2)
ax.spines[axis].set_color("white")
fig.text( x = 0.35,
y = 1.05,
s = 'Daily wrongful death',
fontsize = 15, alpha = .75, weight = 'bold')
fig.text( x = 0.37,
y = 1.01,
s = 'usually car accidents',
fontsize = 12, alpha = .75)
fig.text( x = 0.15,
y = 0.05,
s = 'Source: Policia Nacional Civil',
fontsize = 9, alpha = .75)
fig.patch.set_facecolor('white')
blue = mlines.Line2D([], [], color='#2b728f',
markersize=15, label='Daily deaths')
pink = mlines.Line2D([], [], color='#cc2975',
markersize=15, label='5-year daily average')
fig.legend(handles=[pink, blue], loc = [0.37, 0.82])
plt.savefig('hom_culposos_eng.png', bbox_inches='tight', dpi=600)
#reagruping
acc = pd.pivot_table(crimen, index = 'date', values ='HOMICIDIO/Total', aggfunc = 'sum').reset_index()
#condition = (acc.date.dt.day != 29) & (acc.date.dt.month != 2)
acc['freq'] = acc['HOMICIDIO/Total']/acc.date.dt.day
acc = acc[condition].set_index('date').resample('D').asfreq().interpolate(method = 'linear').reset_index()
acc['month'] = acc.date.dt.month
acc['day'] = acc.date.dt.day
year_2015_2019 = (acc['date']<'01-01-2020')
mean = acc[year_2015_2019].pivot_table(index = ['day','month'], values = 'freq', aggfunc = 'sum').reset_index()
mean['freq'] = mean['freq']/5
mean.loc[(mean['day']<=30) & (mean['month']==1),'freq'] = mean.loc[(mean['day']<=30) & (mean['month']==1),'freq']*5/4
mean.loc[(mean['day']==29) & (mean['month']==2),'freq'] = mean.loc[(mean['day']==1) & (mean['month']==3),'freq']
#mean = mean.loc[(mean['day']!=29) & (mean['month']!=2)]
#merge
acc = acc.merge(mean, on = ['day', 'month'])
acc = acc.rename(columns = {'freq_x':'freq', 'freq_y':'mean'})
data_line = pd.pivot_table(acc, index = 'date', values = ['freq', 'mean'],
aggfunc = {'freq': np.sum,'mean':np.mean }).reset_index()
data_line.sort_values(by = ['date'], inplace = True)
data_line.loc[data_line['date'] == '2020-02-29', 'mean'] = data_line.loc[data_line['date'] == '2020-03-01', 'mean'].values[0]
data_line[['freq', 'mean']] = data_line[['freq','mean']].rolling(window=7).mean()
graph = data_line[(data_line['date']>= '01-01-2016') & (data_line['date']<= '09-10-2020') ]
plt.style.use('bmh')
fig, ax = plt.subplots(figsize = (10, 6))
myFmt = mdates.DateFormatter('%b %Y')
ax.set_prop_cycle(color=['#2b728f', '#cc2975', 'blue'])
ax.plot(graph['date'], graph[['freq','mean']])
ax.xaxis.set_major_formatter(myFmt)
for axis in ['bottom','left']:
ax.spines[axis].set_linewidth(2)
ax.spines[axis].set_color("#FF3636")
for axis in ['top']:
ax.spines[axis].set_linewidth(2)
ax.spines[axis].set_color("white")
fig.text( x = 0.41,
y = 1,
s = 'Homicidios diarios',
fontsize = 15, alpha = .75, weight = 'bold')
fig.text( x = 0.15,
y = 0.05,
s = 'Fuente: Policia Nacional Civil',
fontsize = 10, alpha = .75)
fig.patch.set_facecolor('white')
blue = mlines.Line2D([], [], color='#2b728f',
markersize=15, label='Homicidios diarios')
pink = mlines.Line2D([], [], color='#cc2975',
markersize=15, label='Promedio diario en 5 años')
fig.legend(handles=[pink, blue], loc = [0.37, 0.86])
plt.savefig('hom_eng.png', bbox_inches='tight', dpi=600)
# Section 2 - Loading and Selecting Data
cases = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')
death = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv')
recovered = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv')
#merging al the data
data2 = [cases,death, recovered]
data_name = ['cases','death','recovered']
for data2, name in zip(data2, data_name):
Columns = ['Country/Region','Province/State', 'Lat', 'Long']
index = [data2.columns.get_loc(C) for C in Columns]
vals = np.delete(data2.columns.values,index)
df = pd.pivot_table(data2, values = vals, index = 'Country/Region')
df = pd.DataFrame(df.reset_index().set_index('Country/Region').unstack()).reset_index()
df.rename(columns={'level_0':'Date','Country/Region':'Country',0:name},inplace=True)
df['Date'] = pd.to_datetime(df.Date)
exec('{} = df'.format(name))
#creando casos activos
data2 = cases.merge(recovered, on =['Date','Country'],how='outer')
data2 = data2.merge(death, on =['Date','Country'],how='outer')
data2['active'] = data2['cases']-data2['recovered']-data2['death']
countries = ['El Salvador']
data2 = data2[data2['Country'].isin(countries)]
#COVID
data['Official and Suspicious Deaths'] = data.CAUSADEF.str.contains('COVID')*1
data_line = pd.pivot_table(data, index = 'date',
values = 'Official and Suspicious Deaths', aggfunc = 'sum').reset_index()
#merge official cases
d_merge = data2[['Date','death']].rename(columns = {'Date': 'date'})
data_line = data_line.merge(d_merge, on = 'date')
for index, row in data_line.iterrows():
if index > 0:
data_line.loc[data_line.index == index, 'oficial'] = \
data_line.loc[data_line.index == index, 'death'].values[0] - data_line.loc[data_line.index == index - 1, 'death'].values[0]
graph = data_line[(data_line['date']>= '01-01-2020') & (data_line['date']<= '09-10-2020') ]
plt.style.use('bmh')
fig, ax = plt.subplots(figsize = (10, 6))
myFmt = mdates.DateFormatter('%b %Y')
ax.set_prop_cycle(color=['#2b728f', '#cc2975', 'blue'])
ax.plot(graph['date'], graph[['Official and Suspicious Deaths','oficial']])
ax.xaxis.set_major_formatter(myFmt)
for axis in ['bottom','left']:
ax.spines[axis].set_linewidth(2)
ax.spines[axis].set_color("#FF3636")
for axis in ['top']:
ax.spines[axis].set_linewidth(2)
ax.spines[axis].set_color("white")
fig.text( x = 0.42,
y = 1,
s = 'Deaths from COVID',
fontsize = 15, alpha = .75, weight = 'bold')
fig.text( x = 0.15,
y = 0.05,
s = 'Source: Pagina oficial de COVID and Registro Nacional de Personas Naturales',
fontsize = 10, alpha = .75)
fig.patch.set_facecolor('white')
blue = mlines.Line2D([], [], color='#2b728f',
markersize=15, label='Official and Suspicious Deaths')
pink = mlines.Line2D([], [], color='#cc2975',
markersize=15, label='Official Deaths')
fig.legend(handles=[pink, blue], loc = [0.37, 0.86])
plt.savefig('COVID_muertes_eng.png', bbox_inches='tight', dpi=600)
data['day'] = data.date.dt.day
data['month'] = data.date.dt.month
#adding categorias
data['diabetes'] = data.CAUSADEF.str.contains('DIAB')*1
data['cardio'] = (data.CAUSADEF.str.contains('CARD') | data.CAUSADEF.str.contains('INFAR') )*1
data['neumonia'] = (data.CAUSADEF.str.contains('NEUM') | data.CAUSADEF.str.contains('RESP'))*1
data['cancer'] = data.CAUSADEF.str.contains('CANCE')*1
year_2015_2019 = (data['year']>=2015) & (data['year']<=2019)
mean = data[year_2015_2019].pivot_table(index = ['day','month'],
values = ['freq','diabetes','cardio','neumonia','cancer'],
aggfunc = {'freq': np.sum,
'diabetes':np.sum,
'cardio':np.sum,
'cancer': np.sum,
'neumonia': np.sum}).reset_index()
for col in ['freq','diabetes','cardio','neumonia','cancer']:
mean[col] = mean[col]/5
mean.loc[(mean['day']==29) & (mean['month']==2),col] = mean.loc[(mean['day']==29) & (mean['month']==2),col]*5
#merging the data
mean = pd.melt(mean, id_vars = ['day','month'],
value_vars = ['freq','diabetes','cardio','neumonia','cancer'],
value_name = 'mean')
df = pd.melt(data, id_vars = ['date','day','month'],
value_vars = ['freq','diabetes','cardio','neumonia','cancer'],
value_name = 'cases')
df = df.pivot_table(index = ['date','day','month','variable'],
values = ['cases'],
aggfunc = np.sum).reset_index()
df = df.merge(mean, on = ['day', 'month', 'variable'])
plt.style.use('bmh')
fig, axs = plt.subplots(2, 2, sharex = True, figsize = (11,8))
myFmt = mdates.DateFormatter('%b')
cause = ['cardio','neumonia','cancer','diabetes']
x = [0,0,1,1]
y = [0,1,0,1]
title = ['Cardio','Pneumonia and respiratory arrest','Cancer','Diabetes']
for cause, title, x, y in zip (cause, title, x, y):
graph = df.sort_values(by = ['variable','date'])
graph = graph[(graph['variable'] == cause) & (graph['date']>='2020-01-01')]
graph[['cases','mean']] = graph[['cases','mean']].rolling(window=7).mean()
axs[x, y].set_prop_cycle(color=['#2b728f', '#cc2975', 'blue'])
axs[x, y].plot(graph['date'], graph[['cases','mean']])
axs[x, y].set_title(title)
axs[x, y].xaxis.set_major_formatter(myFmt)
for axis in ['bottom','left']:
axs[x, y].spines[axis].set_linewidth(2)
axs[x, y].spines[axis].set_color("#FF3636")
for axis in ['top']:
axs[x, y].spines[axis].set_linewidth(2)
axs[x, y].spines[axis].set_color("white")
fig.text( x = 0.26,
y = 1,
s = 'Daily deaths according to reported causes in 2020',
fontsize = 15, alpha = .75, weight = 'bold')
fig.text( x = 0.15,
y = 0.07,
s = 'Source: Registro Nacional de Personas Naturales',
fontsize = 10, alpha = .75)
fig.text( x = 0.15,
y = 0.05,
s = 'Note: Some deaths have not yet been recorded, especially for more recent dates',
fontsize = 10, alpha = .75)
fig.text( x = 0.15,
y = 0.03,
s = 'for this reason a fall is noticed in the last dates',
fontsize = 10, alpha = .75)
fig.patch.set_facecolor('white')
blue = mlines.Line2D([], [], color='#2b728f',
markersize=15, label='Daily deaths in 2020')
pink = mlines.Line2D([], [], color='#cc2975',
markersize=15, label='5-year daily average')
fig.legend(handles=[pink, blue], loc = [0.37, 0.89])
plt.savefig('causas_eng.png', bbox_inches='tight', dpi=600)