COVID-19

Visualizaciones Interactivas con Python Bokeh


Contenido

  1. Introducción
  2. Paquetes Python
  3. Prep. de Datos
  4. Totales
  5. México
  6. Mundo
  7. Top 30 Casos
  8. Top 30 Decesos
  9. Coméntanos

Introducción

Este es un proyecto 100% reproducible en el que creamos visualizaciones interactivas sobre la evolución de la pandemia del Coronavirus COVID-19.
Los datos son tomados de "Our World in Data: Coronavirus Source Data" y se actualizan cada día.


Paquetes Python

In [2]:
import datetime
import pandas as pd
import numpy as np
import itertools
from os import listdir
from bokeh import plotting
from bokeh.io import output_notebook, show
from bokeh.models import HoverTool, FactorRange, CustomJS, Div, Row, ImageURL
from bokeh.plotting import figure, output_file, show
from bokeh.layouts import gridplot
from IPython.display import display, HTML
from bokeh.models import ColumnDataSource
from bokeh.palettes import Category20c
output_notebook()
Loading BokehJS ...
In [3]:
print("Actualizado al: " +
      str(datetime.datetime.now().strftime("%Y-%b-%d %H:%M")))
Actualizado al: 2020-Sep-21 11:11

Preparación de Datos

In [4]:
data = pd.read_csv("https://covid.ourworldindata.org/data/ecdc/full_data.csv")
#data = pd.read_csv("full_data.csv")
data['date'] = pd.to_datetime(data['date'], format='%Y-%m-%d')

Totales

Casos en el Mundo

In [5]:
a = int(data['total_cases'][data['location'] == 'World'].max())

31,091,469

Decesos en el Mundo

In [6]:
b = int(data['total_deaths'][data['location'] == 'World'].max())

961,352

Casos en México

In [7]:
c = int(data['total_cases'][data['location'] == 'Mexico'].max())

697,663

Decesos en México

In [8]:
d = int(data['total_deaths'][data['location'] == 'Mexico'].max())

73,493

Casos y Decesos en México

In [9]:
dataM = data[(data['location'] == 'Mexico') & (data['date'] >= '2020-03-12')]
In [10]:
def vis_1():
    ''' 
    Autor: Francisco Jaramillo
    https://www.pakin.lat/
    
    '''

    sourceM = plotting.ColumnDataSource(dataM)

    p = figure(title="",
               x_axis_label='Fecha',
               y_axis_label='Número Casos',
               plot_width=900,
               plot_height=450,
               sizing_mode='scale_width',
               x_axis_type='datetime',
               tools="box_zoom, reset, wheel_zoom, save")

    plot1 = p.line(x='date',
                   y='total_cases',
                   line_width=2,
                   source=sourceM,
                   color='red')

    p.circle(x='date',
             y='total_cases',
             size=6,
             source=sourceM,
             fill_color=None,
             color='red')

    plot2 = p.line(x='date',
                   y='total_deaths',
                   line_width=2,
                   source=sourceM,
                   color='black')

    p.circle(x='date',
             y='total_deaths',
             size=6,
             source=sourceM,
             fill_color=None,
             color='black')

    p.add_tools(
        HoverTool(renderers=[plot1],
                  tooltips=[
                      ("País", "@location"),
                      ("Fecha", "@date{%F}"),
                      ("Casos Totales", "@total_cases"),
                  ],
                  formatters={'@date': 'datetime'}))

    p.add_tools(
        HoverTool(renderers=[plot2],
                  tooltips=[
                      ("País", "@location"),
                      ("Fecha", "@date{%F}"),
                      ("Decesos Totales", "@total_deaths"),
                  ],
                  formatters={'@date': 'datetime'}))

    p.toolbar.active_drag = None
    p.left[0].formatter.use_scientific = False
    p.title.text_font_size = '3vw'
    p.xaxis.axis_label_text_font_size = "2vw"
    p.yaxis.axis_label_text_font_size = "2vw"
    p.xaxis.major_label_text_font_size = "1.3vw"
    p.yaxis.major_label_text_font_size = "1vw"

    show(p)

Casos y Decesos Totales en México

In [12]:
def vis_2():
    ''' 
    Autor: Francisco Jaramillo
    https://www.pakin.lat/
    
    '''

    sourceM = plotting.ColumnDataSource(dataM)

    p = figure(title="",
               x_axis_label='Fecha',
               y_axis_label='Número Casos',
               plot_width=900,
               plot_height=450,
               sizing_mode='scale_width',
               x_axis_type='datetime',
               tools="box_zoom, reset, wheel_zoom, save")

    plot1 = p.line(x='date',
                   y='new_cases',
                   line_width=2,
                   source=sourceM,
                   color='red')

    p.circle(x='date',
             y='new_cases',
             size=6,
             source=sourceM,
             fill_color=None,
             color='red')

    plot2 = p.line(x='date',
                   y='new_deaths',
                   line_width=2,
                   source=sourceM,
                   color='black')

    p.circle(x='date',
             y='new_deaths',
             size=6,
             source=sourceM,
             fill_color=None,
             color='black')

    p.add_tools(
        HoverTool(renderers=[plot1],
                  tooltips=[
                      ("País", "@location"),
                      ("Fecha", "@date{%F}"),
                      ("Casos Diarios", "@new_cases"),
                  ],
                  formatters={'@date': 'datetime'}))

    p.add_tools(
        HoverTool(renderers=[plot2],
                  tooltips=[
                      ("País", "@location"),
                      ("Fecha", "@date{%F}"),
                      ("Decesos Diarios", "@new_deaths"),
                  ],
                  formatters={'@date': 'datetime'}))

    p.toolbar.active_drag = None
    p.left[0].formatter.use_scientific = False
    p.title.text_font_size = '3vw'
    p.xaxis.axis_label_text_font_size = "2vw"
    p.yaxis.axis_label_text_font_size = "2vw"
    p.xaxis.major_label_text_font_size = "1.3vw"
    p.yaxis.major_label_text_font_size = "1vw"

    show(p)

Casos y Decesos Diarios en México

Casos y Decesos Totales en los Estados

Los datos son extraídos diariamente del portal oficial de datos sobre Covid-19 del gobierno de México.
Covid-19 México

In [14]:
csv_s = [
    file for file in listdir(
        '/home/pakin/Documents/desarrolloWeb/PakinV5/me/cov19-estados/')
    if file.endswith('.csv')
]
In [15]:
data_edosC = pd.read_csv(
    "/home/pakin/Documents/desarrolloWeb/PakinV5/me/cov19-estados/{}".format(
        [s for s in csv_s if 'Confirmados' in s][0]))
In [16]:
data_edosC = pd.read_csv(
    "/home/pakin/Documents/desarrolloWeb/PakinV5/me/cov19-estados/{}".format(
        [s for s in csv_s if 'Confirmados' in s][0]))

data_edosCT = data_edosC.melt(id_vars=["cve_ent", "poblacion", "nombre"],
                              var_name="FECHA",
                              value_name="CASOS")

data_edosCTT = data_edosCT.pivot(index='FECHA',
                                 columns='nombre',
                                 values='CASOS').rename_axis(None, axis=1)

data_edosCTT.reset_index(inplace=True)

data_edosCTT['FECHA'] = pd.to_datetime(data_edosCTT['FECHA'],
                                       format='%d-%m-%Y')

data_edosCTT = data_edosCTT.sort_values(by=['FECHA'])
data_edosCTT = data_edosCTT[(data_edosCTT['FECHA'] >= '2020-03-01')]

data_edosCTT.columns = data_edosCTT.columns.str.strip().str.replace(' ', '_')

colms = data_edosCTT.columns.to_list()[1:]
data_edosCTT.loc[:, colms] = data_edosCTT.loc[:, colms].cumsum(axis=0)

cols = [col for col in data_edosCTT.columns if col not in ['Nacional']]
data_edosCTT = data_edosCTT[cols]

estados_1 = data_edosCTT.columns.tolist()[1:16]
estados_2 = data_edosCTT.columns.tolist()[16:]

n1 = len(estados_1)
n2 = len(estados_2)


def color_gen1():
    yield from itertools.cycle(Category20c[n1])


color1 = color_gen1()


def color_gen2():
    yield from itertools.cycle(Category20c[n2])


color2 = color_gen2()
In [17]:
def vis_3():
    ''' 
    Autor: Francisco Jaramillo
    https://www.pakin.lat/
    
    '''

    source = ColumnDataSource(data_edosCTT)

    col_names = estados_1

    p = figure(title="",
               x_axis_type="datetime",
               plot_width=900,
               plot_height=450,
               sizing_mode='scale_width',
               toolbar_location='right',
               tools="box_zoom, reset, wheel_zoom, save")

    p_dict = dict()

    for col, c, col_name, leg in zip(data_edosCTT.loc[:, estados_1], color1,
                                     col_names, estados_1):

        p_dict[col_name] = p.line('FECHA',
                                  col,
                                  source=source,
                                  color=c,
                                  line_width=2,
                                  legend_label=leg)

        p_dict[col_name] = p.circle('FECHA',
                                    col,
                                    source=source,
                                    color=c,
                                    size=6,
                                    fill_color=None,
                                    legend_label=leg)

        p.add_tools(
            HoverTool(toggleable=False,
                      renderers=[p_dict[col_name]],
                      tooltips=[('Estado', col), ('Fecha', '@FECHA{%F}'),
                                ('Casos Acumulados', f'@{col}')],
                      formatters={'@FECHA': 'datetime'}))

    p.toolbar.active_drag = None
    p.legend.click_policy = "hide"

    p.legend.spacing = 15
    p.legend.padding = 20

    p.legend.click_policy = "hide"

    p.legend.label_text_font_size = "1vw"
    p.legend.location = 'top_left'
    p.left[0].formatter.use_scientific = False

    p.left[0].formatter.use_scientific = False
    p.title.text_font_size = '3vw'
    p.xaxis.axis_label_text_font_size = "2vw"
    p.yaxis.axis_label_text_font_size = "2vw"
    p.xaxis.major_label_text_font_size = "1.3vw"
    p.yaxis.major_label_text_font_size = "1vw"

    show(p)

Casos Totales en Estados
Parte 1

In [19]:
def vis_4():
    ''' 
    Autor: Francisco Jaramillo
    https://www.pakin.lat/
    
    '''

    source = ColumnDataSource(data_edosCTT)

    col_names = estados_2

    p = figure(title="",
               x_axis_type="datetime",
               plot_width=800,
               plot_height=450,
               sizing_mode='scale_width',
               toolbar_location='right',
               tools="box_zoom, reset, wheel_zoom, save")

    p_dict = dict()

    for col, c, col_name, leg in zip(data_edosCTT.loc[:, estados_2], color2,
                                     col_names, estados_2):

        p_dict[col_name] = p.line('FECHA',
                                  col,
                                  source=source,
                                  color=c,
                                  line_width=2,
                                  legend_label=leg)

        p_dict[col_name] = p.circle('FECHA',
                                    col,
                                    source=source,
                                    color=c,
                                    size=6,
                                    fill_color=None,
                                    legend_label=leg)

        p.add_tools(
            HoverTool(toggleable=False,
                      renderers=[p_dict[col_name]],
                      tooltips=[('Estado', col), ('Fecha', '@FECHA{%F}'),
                                ('Casos Acumulados', f'@{col}')],
                      formatters={'@FECHA': 'datetime'}))

    p.legend.spacing = 15
    p.legend.padding = 20

    p.toolbar.active_drag = None
    p.legend.click_policy = "hide"

    p.legend.label_text_font_size = "1vw"
    p.legend.location = 'top_left'
    p.left[0].formatter.use_scientific = False

    p.left[0].formatter.use_scientific = False
    p.title.text_font_size = '3vw'
    p.xaxis.axis_label_text_font_size = "2vw"
    p.yaxis.axis_label_text_font_size = "2vw"
    p.xaxis.major_label_text_font_size = "1.3vw"
    p.yaxis.major_label_text_font_size = "1vw"

    show(p)

Casos Totales en Estados
Parte 2

In [21]:
data_edosD = pd.read_csv(
    "/home/pakin/Documents/desarrolloWeb/PakinV5/me/cov19-estados/{}".format(
         [s for s in csv_s if 'Defunciones' in s][0]))

data_edosDT = data_edosD.melt(id_vars=["cve_ent", "poblacion", "nombre"],
                              var_name="FECHA",
                              value_name="CASOS")

data_edosDTT = data_edosDT.pivot(index='FECHA',
                                 columns='nombre',
                                 values='CASOS').rename_axis(None, axis=1)

data_edosDTT.reset_index(inplace=True)

data_edosDTT['FECHA'] = pd.to_datetime(data_edosDTT['FECHA'],
                                       format='%d-%m-%Y')

data_edosDTT = data_edosDTT.sort_values(by=['FECHA'])
data_edosDTT = data_edosDTT[(data_edosDTT['FECHA'] >= '2020-03-01')]

data_edosDTT.columns = data_edosDTT.columns.str.strip().str.replace(' ', '_')

colms = data_edosDTT.columns.to_list()[1:]
data_edosDTT.loc[:, colms] = data_edosDTT.loc[:, colms].cumsum(axis=0)

data_edosDTT = data_edosDTT[cols]
In [22]:
def vis_5():
    ''' 
    Autor: Francisco Jaramillo
    https://www.pakin.lat/
    
    '''

    source = ColumnDataSource(data_edosDTT)

    col_names = estados_1

    p = figure(title="",
               x_axis_type="datetime",
               plot_width=900,
               plot_height=450,
               sizing_mode='scale_width',
               toolbar_location='right',
               tools="box_zoom, reset, wheel_zoom, save")

    p_dict = dict()

    for col, c, col_name, leg in zip(data_edosDTT.loc[:, estados_1], color1,
                                     col_names, estados_1):

        p_dict[col_name] = p.line('FECHA',
                                  col,
                                  source=source,
                                  color=c,
                                  line_width=2,
                                  legend_label=leg)

        p_dict[col_name] = p.circle('FECHA',
                                    col,
                                    source=source,
                                    color=c,
                                    size=6,
                                    fill_color=None,
                                    legend_label=leg)

        p.add_tools(
            HoverTool(toggleable=False,
                      renderers=[p_dict[col_name]],
                      tooltips=[('Estado', col), ('Fecha', '@FECHA{%F}'),
                                ('Decesos Acumulados', f'@{col}')],
                      formatters={'@FECHA': 'datetime'}))

    p.legend.click_policy = "hide"

    p.legend.spacing = 15
    p.legend.padding = 20

    p.toolbar.active_drag = None
    p.legend.click_policy = "hide"

    p.legend.label_text_font_size = "1vw"
    p.legend.location = 'top_left'
    p.left[0].formatter.use_scientific = False

    p.left[0].formatter.use_scientific = False
    p.title.text_font_size = '3vw'
    p.xaxis.axis_label_text_font_size = "2vw"
    p.yaxis.axis_label_text_font_size = "2vw"
    p.xaxis.major_label_text_font_size = "1.3vw"
    p.yaxis.major_label_text_font_size = "1vw"

    show(p)

Decesos Totales en Estados
Parte 1

In [24]:
def vis_6():
    ''' 
    Autor: Francisco Jaramillo
    https://www.pakin.lat/
    
    '''

    source = ColumnDataSource(data_edosDTT)

    col_names = estados_2

    p = figure(title="",
               x_axis_type="datetime",
               plot_width=800,
               plot_height=450,
               sizing_mode='scale_width',
               toolbar_location='right',
               tools="box_zoom, reset, wheel_zoom, save")

    p_dict = dict()

    for col, c, col_name, leg in zip(data_edosDTT.loc[:, estados_2], color2,
                                     col_names, estados_2):

        p_dict[col_name] = p.line('FECHA',
                                  col,
                                  source=source,
                                  color=c,
                                  line_width=2,
                                  legend_label=leg)

        p_dict[col_name] = p.circle('FECHA',
                                    col,
                                    source=source,
                                    color=c,
                                    size=6,
                                    fill_color=None,
                                    legend_label=leg)

        p.add_tools(
            HoverTool(toggleable=False,
                      renderers=[p_dict[col_name]],
                      tooltips=[('Estado', col), ('Fecha', '@FECHA{%F}'),
                                ('Decesos Acumulados', f'@{col}')],
                      formatters={'@FECHA': 'datetime'}))

    p.legend.spacing = 15
    p.legend.padding = 20

    p.toolbar.active_drag = None
    p.legend.click_policy = "hide"

    p.legend.label_text_font_size = "1vw"
    p.legend.location = 'top_left'
    p.left[0].formatter.use_scientific = False

    p.left[0].formatter.use_scientific = False
    p.title.text_font_size = '3vw'
    p.xaxis.axis_label_text_font_size = "2vw"
    p.yaxis.axis_label_text_font_size = "2vw"
    p.xaxis.major_label_text_font_size = "1.3vw"
    p.yaxis.major_label_text_font_size = "1vw"

    show(p)

Decesos Totales en Estados
Parte 2

Casos y Decesos en Diferentes Países

In [26]:
n = 18


def color_gen():
    yield from itertools.cycle(Category20c[n])


color = color_gen()

paises = data.groupby(['location']).max().sort_values(
    'total_cases', ascending=False).head(n).iloc[1:].index.tolist()

col_names = paises
In [27]:
def vis_7():
    ''' 
    Autor: Francisco Jaramillo
    https://www.pakin.lat/
    
    '''

    dataTCDP = data.pivot(index='date',
                          columns='location',
                          values='total_cases').rename_axis(None, axis=1)
    
    dataTCDP = dataTCDP.loc[(dataTCDP.index >= '2020-02-01')]
    
    dataTCDP17 = dataTCDP.loc[:, paises]
    
    dataTCDP17.columns = dataTCDP17.columns.str.strip().str.replace(' ', '_')

    output_notebook()

    source = ColumnDataSource(dataTCDP17)

    p = figure(title="",
               x_axis_label='Fecha',
               y_axis_label='Número de Casos',
               x_axis_type="datetime",
               plot_width=900,
               plot_height=450,
               sizing_mode='scale_width',
               tools="box_zoom, reset, wheel_zoom, save")

    p_dict = dict()

    for col, c, col_name, leg in zip(dataTCDP17.columns, color, col_names,
                                     paises):

        p_dict[col_name] = p.line('date',
                                  col,
                                  source=source,
                                  color=c,
                                  line_width=2,
                                  legend_label=leg)

        p_dict[col_name] = p.circle('date',
                                    col,
                                    source=source,
                                    color=c,
                                    size=6,
                                    line_width=0.5,
                                    fill_color=None,
                                    legend_label=leg)

        p.add_tools(
            HoverTool(toggleable=False,
                      renderers=[p_dict[col_name]],
                      tooltips=[
                          ('País', col),
                          ('Fecha', '@date{%F}'),
                          ('Casos Totales', f'@{col}'),
                      ],
                      formatters={'@date': 'datetime'}))

    p.legend.spacing = 10
    p.legend.padding = 15
    
    p.toolbar.active_drag = None
    p.legend.click_policy = "hide"

    p.legend.label_text_font_size = "1vw"
    p.legend.location = 'top_left'
    p.left[0].formatter.use_scientific = False

    p.left[0].formatter.use_scientific = False
    p.title.text_font_size = '3vw'
    p.xaxis.axis_label_text_font_size = "2vw"
    p.yaxis.axis_label_text_font_size = "2vw"
    p.xaxis.major_label_text_font_size = "1.3vw"
    p.yaxis.major_label_text_font_size = "1vw"

    show(p)

Casos Totales Diferentes Países

Loading BokehJS ...
In [29]:
def vis_8():
    ''' 
    Autor: Francisco Jaramillo
    https://www.pakin.lat/
    
    '''

    dataTDDP = data.pivot(index='date',
                          columns='location',
                          values='total_deaths').rename_axis(None, axis=1)
    dataTDDP = dataTDDP.loc[(dataTDDP.index >= '2020-02-01')]

    dataTDDP17 = dataTDDP.loc[:, paises]

    dataTDDP17.columns = dataTDDP17.columns.str.strip().str.replace(' ', '_')

    output_notebook()

    source = ColumnDataSource(dataTDDP17)

    p = figure(title="",
               x_axis_label='Fecha',
               y_axis_label='Número de Decesos',
               x_axis_type="datetime",
               plot_width=900,
               plot_height=450,
               sizing_mode='scale_width',
               tools="box_zoom, reset, wheel_zoom, save")

    p_dict = dict()

    for col, c, col_name, leg in zip(dataTDDP17.columns, color, col_names,
                                     paises):

        p_dict[col_name] = p.line('date',
                                  col,
                                  source=source,
                                  color=c,
                                  line_width=2,
                                  legend_label=leg)

        p_dict[col_name] = p.circle('date',
                                    col,
                                    source=source,
                                    color=c,
                                    size=6,
                                    line_width=0.5,
                                    fill_color=None,
                                    legend_label=leg)

        p.add_tools(
            HoverTool(toggleable=False,
                      renderers=[p_dict[col_name]],
                      tooltips=[
                          ('País', col),
                          ('Fecha', '@date{%F}'),
                          ('Decesos Totales', f'@{col}'),
                      ],
                      formatters={'@date': 'datetime'}))

    
    p.legend.spacing = 10
    p.legend.padding = 15
    
    p.toolbar.active_drag = None
    p.legend.click_policy = "hide"

    p.legend.label_text_font_size = "1vw"
    p.legend.location = 'top_left'
    p.left[0].formatter.use_scientific = False

    p.left[0].formatter.use_scientific = False
    p.title.text_font_size = '3vw'
    p.xaxis.axis_label_text_font_size = "2vw"
    p.yaxis.axis_label_text_font_size = "2vw"
    p.xaxis.major_label_text_font_size = "1.3vw"
    p.yaxis.major_label_text_font_size = "1vw"

    show(p)

Decesos Totales Diferentes Países

Loading BokehJS ...

Top 30 Casos Totales

In [31]:
dataTC = data.groupby(['location'
                       ]).max().sort_values('total_cases',
                                            ascending=False).head(31).iloc[1:]
dataTC = dataTC.reset_index()
In [32]:
def vis_9():
    ''' 
    Autor: Francisco Jaramillo
    https://www.pakin.lat/
    
    '''

    source = plotting.ColumnDataSource(dataTC)

    p = figure(title='',
               x_axis_label='',
               y_axis_label='',
               plot_width=900,
               plot_height=450,
               x_range=FactorRange(factors=list(dataTC['location'])),
               sizing_mode='scale_width',
               tools="box_zoom, reset, wheel_zoom, save")

    plot1 = p.vbar(x='location',
                   width=0.5,
                   bottom=0,
                   top='total_cases',
                   source=source,
                   color='#B22222')

    p.add_tools(
        HoverTool(
            renderers=[plot1],
            tooltips=[
                ("País", "@location"),
                ("Casos Totales", "@total_cases"),
            ],
        ))

    p.toolbar.active_drag = None
    p.left[0].formatter.use_scientific = False
    p.title.text_font_size = '3vw'
    p.xaxis.axis_label_text_font_size = "2vw"
    p.yaxis.axis_label_text_font_size = "2vw"
    p.xaxis.major_label_text_font_size = "1.3vw"
    p.yaxis.major_label_text_font_size = "1.3vw"
    p.xaxis.major_label_orientation = np.pi / 5

    show(p)

Top 30 Países Casos Totales

Top 30 Decesos Totales

In [34]:
dataTD = data.groupby(['location'
                       ]).max().sort_values('total_deaths',
                                            ascending=False).head(31).iloc[1:]
dataTD = dataTD.reset_index()
In [35]:
def vis_10():
    ''' 
    Autor: Francisco Jaramillo
    https://www.pakin.lat/
    
    '''

    source = plotting.ColumnDataSource(dataTD)

    p = figure(title='',
               x_axis_label='',
               y_axis_label='',
               plot_width=900,
               plot_height=450,
               x_range=FactorRange(factors=list(dataTD['location'])),
               sizing_mode='scale_width',
               tools="box_zoom, reset, wheel_zoom, save")

    plot1 = p.vbar(x='location',
                   width=0.5,
                   bottom=0,
                   top='total_deaths',
                   source=source,
                   color='#000000')

    p.add_tools(
        HoverTool(
            renderers=[plot1],
            tooltips=[
                ("País", "@location"),
                ("Decesos Totales", "@total_deaths"),
            ],
        ))

    p.toolbar.active_drag = None
    p.left[0].formatter.use_scientific = False
    p.title.text_font_size = '3vw'
    p.xaxis.axis_label_text_font_size = "2vw"
    p.yaxis.axis_label_text_font_size = "2vw"
    p.xaxis.major_label_text_font_size = "1.3vw"
    p.yaxis.major_label_text_font_size = "1.3vw"
    p.xaxis.major_label_orientation = np.pi / 5

    show(p)

Top 30 Países Decesos Totales

Regresar al inicio


Esperamos con gusto tus comentarios y sugerencias: