2020-03-28 11:37:54 +00:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
"""
|
|
|
|
Project: Analyse worldwide COVID-19 Data and provide graphs etc.
|
|
|
|
|
|
|
|
@author Patrick Müller
|
|
|
|
"""
|
|
|
|
import numpy as np
|
|
|
|
import pandas as pd
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
import tkinter as tk
|
2020-03-31 21:31:35 +00:00
|
|
|
import random
|
2020-03-28 11:37:54 +00:00
|
|
|
|
|
|
|
"""
|
|
|
|
Fields in csv:
|
|
|
|
dateRep
|
|
|
|
day
|
|
|
|
month
|
|
|
|
year
|
|
|
|
cases
|
|
|
|
deaths
|
|
|
|
countriesAndTerritories
|
|
|
|
geoId
|
|
|
|
countryterritoryCode
|
2020-07-20 11:44:07 +00:00
|
|
|
popData2019
|
2020-03-28 11:37:54 +00:00
|
|
|
"""
|
2020-03-28 15:55:34 +00:00
|
|
|
from datetime import datetime, timedelta
|
2020-03-28 11:37:54 +00:00
|
|
|
|
|
|
|
|
|
|
|
class Analyser:
|
2020-03-29 20:23:30 +00:00
|
|
|
def __init__(self):
|
|
|
|
# Pandas Settings
|
|
|
|
pd.set_option('display.max_row', 50)
|
|
|
|
pd.set_option('display.max_column', 10)
|
2020-04-01 19:59:26 +00:00
|
|
|
|
2020-03-29 20:23:30 +00:00
|
|
|
self.df = pd.read_csv('statsfile.csv')
|
|
|
|
self.df['dateRep'] = pd.to_datetime(self.df['dateRep'], format='%d/%m/%Y')
|
2020-03-28 11:37:54 +00:00
|
|
|
|
2020-04-10 19:59:58 +00:00
|
|
|
# Calculate total Numbers for each country
|
|
|
|
self.df['totalCases'] = 0
|
|
|
|
self.df['totalDeaths'] = 0
|
|
|
|
self.df['deathRate'] = 0
|
|
|
|
for country in self.df['countriesAndTerritories'].unique():
|
|
|
|
countryData = self.df[self.df['countriesAndTerritories'].isin([country])]
|
|
|
|
countryData = countryData.sort_values('dateRep')
|
|
|
|
countryData['totalCases'] = countryData['cases'].cumsum()
|
|
|
|
countryData['totalDeaths'] = countryData['deaths'].cumsum()
|
|
|
|
countryData['deathRate'] = countryData['totalDeaths'] / countryData['totalCases'] * 100
|
|
|
|
self.df.update(countryData)
|
|
|
|
|
|
|
|
print('DEBUG: Analyser initialized')
|
|
|
|
|
2020-03-28 11:37:54 +00:00
|
|
|
def getAvailableCountries(self):
|
2020-03-30 15:49:23 +00:00
|
|
|
sorted = self.df.sort_values('countriesAndTerritories')
|
|
|
|
return sorted['countriesAndTerritories'].unique()
|
2020-03-28 11:37:54 +00:00
|
|
|
|
2020-03-29 20:23:30 +00:00
|
|
|
def getAvailableDates(self):
|
|
|
|
retList = []
|
|
|
|
for date in self.df['dateRep'].unique():
|
|
|
|
# To only get the substring in the format YYYY-MM-DD
|
|
|
|
retList.append(str(date)[:10])
|
|
|
|
retList.sort()
|
|
|
|
return retList
|
|
|
|
|
2020-04-01 19:59:26 +00:00
|
|
|
def getCasesGraph(self, country, start_date='2019-12-31', end_date=datetime.now().strftime('%Y-%m-%d'),
|
2020-04-11 12:27:50 +00:00
|
|
|
plotDpi=200.0, showPlot=False) -> str:
|
2020-03-28 15:55:34 +00:00
|
|
|
"""
|
|
|
|
Get a graph with the absolute number of cases by day for the entered country
|
|
|
|
:param country: The country you wish to get the graph for
|
2020-03-30 15:02:56 +00:00
|
|
|
:param start_date: The start date of the graph
|
|
|
|
:param end_date: The end date of the graph
|
2020-03-31 14:24:42 +00:00
|
|
|
:param showPlot: Whether to show the plot or only return the file path
|
2020-03-28 15:55:34 +00:00
|
|
|
:return: The path for the picture of the graph
|
|
|
|
"""
|
2020-03-28 20:45:50 +00:00
|
|
|
if country in self.getAvailableCountries():
|
|
|
|
fig = plt.figure()
|
2020-04-11 12:27:50 +00:00
|
|
|
fig.dpi = plotDpi
|
2020-03-28 20:45:50 +00:00
|
|
|
ax = fig.add_subplot(111)
|
2020-03-30 15:36:59 +00:00
|
|
|
plt.title(('Total cases in ' + country))
|
2020-03-28 20:45:50 +00:00
|
|
|
|
|
|
|
countryData = self.df[self.df['countriesAndTerritories'].isin([country])]
|
2020-04-04 15:10:22 +00:00
|
|
|
countryData = countryData.sort_values('dateRep')
|
2020-04-04 16:08:46 +00:00
|
|
|
countryData['7-Day-Mean'] = countryData['totalCases'].rolling(7).mean()
|
2020-03-30 15:36:59 +00:00
|
|
|
mask = (countryData['dateRep'] >= start_date) & (countryData['dateRep'] <= end_date)
|
2020-03-28 20:45:50 +00:00
|
|
|
countryTimeData = countryData.loc[mask]
|
2020-04-04 15:10:22 +00:00
|
|
|
|
2020-03-30 15:36:59 +00:00
|
|
|
countryTimeData.plot(ax=ax, x='dateRep', y='totalCases')
|
2020-04-04 16:08:46 +00:00
|
|
|
countryTimeData.plot(ax=ax, x='dateRep', y='7-Day-Mean')
|
2020-03-28 20:45:50 +00:00
|
|
|
|
2020-03-31 14:24:42 +00:00
|
|
|
if showPlot:
|
|
|
|
plt.show(block=True)
|
2020-03-29 10:21:50 +00:00
|
|
|
filePath = ('graphs/casesGraph_' + country + '_' + datetime.now().strftime('%Y-%m-%d'))
|
2020-03-29 10:31:56 +00:00
|
|
|
fig.savefig(filePath)
|
2020-03-31 14:24:42 +00:00
|
|
|
plt.close(fig)
|
2020-03-28 20:45:50 +00:00
|
|
|
|
2020-03-29 10:21:50 +00:00
|
|
|
return filePath
|
2020-03-28 20:45:50 +00:00
|
|
|
else:
|
|
|
|
print('Unknown country')
|
2020-03-29 10:21:50 +00:00
|
|
|
return '-1'
|
2020-03-28 20:45:50 +00:00
|
|
|
|
2020-04-01 19:59:26 +00:00
|
|
|
def getCaseIncreaseGraph(self, country, start_date='2019-12-31', end_date=datetime.now().strftime('%Y-%m-%d'),
|
2020-04-11 12:27:50 +00:00
|
|
|
plotDpi=200.0, showPlot=False) -> str:
|
2020-03-28 20:45:50 +00:00
|
|
|
"""
|
|
|
|
Get a graph with the daily increase number of cases for the entered country
|
|
|
|
:param country: The country you wish to get the graph for
|
2020-03-30 15:02:56 +00:00
|
|
|
:param start_date: The start date of the graph
|
|
|
|
:param end_date: The end date of the graph
|
2020-03-31 14:24:42 +00:00
|
|
|
:param showPlot: Whether to show the plot or only return the file path
|
2020-03-28 20:45:50 +00:00
|
|
|
:return: The path for the picture of the graph
|
|
|
|
"""
|
|
|
|
if country in self.getAvailableCountries():
|
|
|
|
fig = plt.figure()
|
2020-04-11 12:27:50 +00:00
|
|
|
fig.dpi = plotDpi
|
2020-03-28 20:45:50 +00:00
|
|
|
ax = fig.add_subplot(111)
|
2020-03-30 15:36:59 +00:00
|
|
|
plt.title(('Daily new cases in ' + country))
|
2020-03-28 20:45:50 +00:00
|
|
|
|
|
|
|
countryData = self.df[self.df['countriesAndTerritories'].isin([country])]
|
2020-04-04 16:08:46 +00:00
|
|
|
countryData = countryData.sort_values('dateRep')
|
|
|
|
countryData['7-Day-Mean'] = countryData['cases'].rolling(7).mean()
|
2020-03-30 15:36:59 +00:00
|
|
|
mask = (countryData['dateRep'] >= start_date) & (countryData['dateRep'] <= end_date)
|
2020-03-28 20:45:50 +00:00
|
|
|
countryTimeData = countryData.loc[mask]
|
|
|
|
|
|
|
|
countryTimeData.plot(ax=ax, x='dateRep', y='cases')
|
2020-04-04 16:08:46 +00:00
|
|
|
countryTimeData.plot(ax=ax, x='dateRep', y='7-Day-Mean')
|
2020-03-28 20:45:50 +00:00
|
|
|
|
2020-03-31 14:24:42 +00:00
|
|
|
if showPlot:
|
|
|
|
plt.show(block=True)
|
2020-03-29 10:21:50 +00:00
|
|
|
filePath = ('graphs/casesIncreaseGraph_' + country + '_' + datetime.now().strftime('%Y-%m-%d'))
|
2020-03-29 10:31:56 +00:00
|
|
|
fig.savefig(filePath)
|
2020-03-31 14:24:42 +00:00
|
|
|
plt.close(fig)
|
2020-03-28 15:55:34 +00:00
|
|
|
|
2020-03-29 10:21:50 +00:00
|
|
|
return filePath
|
2020-03-28 20:45:50 +00:00
|
|
|
else:
|
|
|
|
print('Unknown country')
|
2020-03-29 10:21:50 +00:00
|
|
|
return '-1'
|
2020-03-28 15:55:34 +00:00
|
|
|
|
|
|
|
def getTotalCases(self, country, date=datetime.now().strftime('%Y-%m-%d')) -> int:
|
|
|
|
"""
|
2020-03-30 17:57:40 +00:00
|
|
|
Get the total cases for the entered country and date
|
2020-03-28 19:17:51 +00:00
|
|
|
:param country: The country you want the case number for. Access available countries via getAvailableCountries()
|
|
|
|
:param date: The date for which the case number is returned. Standard is the current date. Format YYYY-MM-DD
|
2020-03-28 15:55:34 +00:00
|
|
|
:return: The case number
|
|
|
|
"""
|
|
|
|
countryData = self.df[self.df['countriesAndTerritories'].isin([country])]
|
2020-03-28 19:17:51 +00:00
|
|
|
mask = (countryData['dateRep'] <= date)
|
|
|
|
countryTimeData = countryData.loc[mask]
|
|
|
|
return countryTimeData['cases'].sum()
|
2020-03-30 15:02:56 +00:00
|
|
|
|
2020-04-01 19:59:26 +00:00
|
|
|
def getDeathGraph(self, country, start_date='2019-12-31', end_date=datetime.now().strftime('%Y-%m-%d'),
|
2020-04-11 12:27:50 +00:00
|
|
|
plotDpi=200.0, showPlot=False) -> str:
|
2020-03-30 15:02:56 +00:00
|
|
|
"""
|
|
|
|
Get a graph with the absolute number of cases by day for the entered country
|
|
|
|
:param country: The country you wish to get the graph for
|
|
|
|
:param start_date: The start date of the graph
|
|
|
|
:param end_date: The end date of the graph
|
2020-03-31 14:24:42 +00:00
|
|
|
:param showPlot: Whether to show the plot or only return the file path
|
2020-03-30 15:02:56 +00:00
|
|
|
:return: The path for the picture of the graph
|
|
|
|
"""
|
|
|
|
if country in self.getAvailableCountries():
|
|
|
|
fig = plt.figure()
|
2020-04-11 12:27:50 +00:00
|
|
|
fig.dpi = plotDpi
|
2020-03-30 15:02:56 +00:00
|
|
|
ax = fig.add_subplot(111)
|
2020-03-30 15:36:59 +00:00
|
|
|
plt.title(('Total deaths in ' + country))
|
2020-03-30 15:02:56 +00:00
|
|
|
|
|
|
|
countryData = self.df[self.df['countriesAndTerritories'].isin([country])]
|
2020-04-04 15:10:22 +00:00
|
|
|
countryData = countryData.sort_values('dateRep')
|
2020-04-04 16:08:46 +00:00
|
|
|
countryData['7-Day-Mean'] = countryData['totalDeaths'].rolling(7).mean()
|
2020-03-30 15:36:59 +00:00
|
|
|
mask = (countryData['dateRep'] >= start_date) & (countryData['dateRep'] <= end_date)
|
2020-03-30 15:02:56 +00:00
|
|
|
countryTimeData = countryData.loc[mask]
|
2020-04-04 15:10:22 +00:00
|
|
|
|
2020-03-30 15:36:59 +00:00
|
|
|
countryTimeData.plot(ax=ax, x='dateRep', y='totalDeaths')
|
2020-04-04 16:08:46 +00:00
|
|
|
countryTimeData.plot(ax=ax, x='dateRep', y='7-Day-Mean')
|
2020-03-30 15:02:56 +00:00
|
|
|
|
2020-03-31 14:24:42 +00:00
|
|
|
if showPlot:
|
|
|
|
plt.show(block=True)
|
2020-03-30 15:36:59 +00:00
|
|
|
filePath = ('graphs/deathsGraph_' + country + '_' + datetime.now().strftime('%Y-%m-%d'))
|
2020-03-30 15:02:56 +00:00
|
|
|
fig.savefig(filePath)
|
2020-03-31 14:24:42 +00:00
|
|
|
plt.close(fig)
|
2020-03-30 15:02:56 +00:00
|
|
|
|
|
|
|
return filePath
|
|
|
|
else:
|
|
|
|
print('Unknown country')
|
|
|
|
return '-1'
|
|
|
|
|
2020-04-01 19:59:26 +00:00
|
|
|
def getDeathIncreaseGraph(self, country, start_date='2019-12-31', end_date=datetime.now().strftime('%Y-%m-%d'),
|
2020-04-11 12:27:50 +00:00
|
|
|
plotDpi=200.0, showPlot=False) -> str:
|
2020-03-30 15:02:56 +00:00
|
|
|
"""
|
|
|
|
Get a graph with the daily increase number of cases for the entered country
|
|
|
|
:param country: The country you wish to get the graph for
|
|
|
|
:param start_date: The start date of the graph
|
|
|
|
:param end_date: The end date of the graph
|
2020-03-31 14:24:42 +00:00
|
|
|
:param showPlot: Whether to show the plot or only return the file path
|
2020-03-30 15:02:56 +00:00
|
|
|
:return: The path for the picture of the graph
|
|
|
|
"""
|
|
|
|
if country in self.getAvailableCountries():
|
|
|
|
fig = plt.figure()
|
2020-04-11 12:27:50 +00:00
|
|
|
fig.dpi = plotDpi
|
2020-03-30 15:02:56 +00:00
|
|
|
ax = fig.add_subplot(111)
|
2020-03-30 15:36:59 +00:00
|
|
|
plt.title(('Daily new deaths in ' + country))
|
2020-03-30 15:02:56 +00:00
|
|
|
|
|
|
|
countryData = self.df[self.df['countriesAndTerritories'].isin([country])]
|
2020-04-04 16:08:46 +00:00
|
|
|
countryData = countryData.sort_values('dateRep')
|
|
|
|
countryData['7-Day-Mean'] = countryData['deaths'].rolling(7).mean()
|
2020-03-30 15:36:59 +00:00
|
|
|
mask = (countryData['dateRep'] >= start_date) & (countryData['dateRep'] <= end_date)
|
2020-03-30 15:02:56 +00:00
|
|
|
countryTimeData = countryData.loc[mask]
|
|
|
|
|
|
|
|
countryTimeData.plot(ax=ax, x='dateRep', y='deaths')
|
2020-04-04 16:08:46 +00:00
|
|
|
countryTimeData.plot(ax=ax, x='dateRep', y='7-Day-Mean')
|
2020-03-30 15:02:56 +00:00
|
|
|
|
2020-03-31 14:24:42 +00:00
|
|
|
if showPlot:
|
|
|
|
plt.show(block=True)
|
2020-03-30 15:36:59 +00:00
|
|
|
filePath = ('graphs/deathsIncreaseGraph_' + country + '_' + datetime.now().strftime('%Y-%m-%d'))
|
|
|
|
fig.savefig(filePath)
|
2020-03-31 14:24:42 +00:00
|
|
|
plt.close(fig)
|
2020-03-30 15:36:59 +00:00
|
|
|
|
|
|
|
return filePath
|
|
|
|
else:
|
|
|
|
print('Unknown country')
|
|
|
|
return '-1'
|
|
|
|
|
2020-03-30 17:57:40 +00:00
|
|
|
def getTotalDeaths(self, country, date=datetime.now().strftime('%Y-%m-%d')) -> int:
|
|
|
|
"""
|
|
|
|
Get the total deaths for the entered country and date
|
|
|
|
:param country: The country you want the case number for. Access available countries via getAvailableCountries()
|
|
|
|
:param date: The date for which the case number is returned. Standard is the current date. Format YYYY-MM-DD
|
|
|
|
:return: The case number
|
|
|
|
"""
|
|
|
|
countryData = self.df[self.df['countriesAndTerritories'].isin([country])]
|
|
|
|
mask = (countryData['dateRep'] <= date)
|
|
|
|
countryTimeData = countryData.loc[mask]
|
|
|
|
return countryTimeData['deaths'].sum()
|
|
|
|
|
2020-04-01 19:59:26 +00:00
|
|
|
def getDailyDeathRateGraph(self, country, start_date='2019-12-31', end_date=datetime.now().strftime('%Y-%m-%d'),
|
2020-04-11 12:27:50 +00:00
|
|
|
plotDpi=200.0, showPlot=False) -> str:
|
2020-03-30 15:36:59 +00:00
|
|
|
"""
|
|
|
|
Get a graph with the daily increase number of cases for the entered country
|
|
|
|
:param country: The country you wish to get the graph for
|
|
|
|
:param start_date: The start date of the graph
|
|
|
|
:param end_date: The end date of the graph
|
2020-03-31 14:24:42 +00:00
|
|
|
:param showPlot: Whether to show the plot or only return the file path
|
2020-03-30 15:36:59 +00:00
|
|
|
:return: The path for the picture of the graph
|
|
|
|
"""
|
|
|
|
if country in self.getAvailableCountries():
|
|
|
|
fig = plt.figure()
|
2020-04-11 12:27:50 +00:00
|
|
|
fig.dpi = plotDpi
|
2020-03-30 15:36:59 +00:00
|
|
|
ax = fig.add_subplot(111)
|
|
|
|
plt.title(('Daily death rate in ' + country) + ' in %')
|
|
|
|
|
|
|
|
countryData = self.df[self.df['countriesAndTerritories'].isin([country])]
|
2020-04-04 15:10:22 +00:00
|
|
|
countryData = countryData.sort_values('dateRep')
|
2020-04-04 16:08:46 +00:00
|
|
|
countryData['7-Day-Mean'] = countryData['deathRate'].rolling(7).mean()
|
2020-03-30 15:36:59 +00:00
|
|
|
mask = (countryData['dateRep'] >= start_date) & (countryData['dateRep'] <= end_date)
|
|
|
|
countryTimeData = countryData.loc[mask]
|
2020-04-04 15:10:22 +00:00
|
|
|
|
2020-03-30 15:36:59 +00:00
|
|
|
countryTimeData.plot(ax=ax, x='dateRep', y='deathRate')
|
2020-04-04 16:08:46 +00:00
|
|
|
countryTimeData.plot(ax=ax, x='dateRep', y='7-Day-Mean')
|
2020-03-30 15:36:59 +00:00
|
|
|
|
2020-03-31 14:24:42 +00:00
|
|
|
if showPlot:
|
|
|
|
plt.show(block=True)
|
2020-03-30 15:36:59 +00:00
|
|
|
filePath = ('graphs/dailyDeathRateGraph_' + country + '_' + datetime.now().strftime('%Y-%m-%d'))
|
2020-03-30 15:02:56 +00:00
|
|
|
fig.savefig(filePath)
|
2020-03-31 14:24:42 +00:00
|
|
|
plt.close(fig)
|
2020-03-30 15:02:56 +00:00
|
|
|
|
|
|
|
return filePath
|
|
|
|
else:
|
|
|
|
print('Unknown country')
|
2020-03-30 17:57:40 +00:00
|
|
|
return '-1'
|
|
|
|
|
|
|
|
def getDeathRate(self, country, date=datetime.now().strftime('%Y-%m-%d')) -> int:
|
|
|
|
"""
|
|
|
|
Get the death rate for the entered country and date
|
|
|
|
:param country: The country you want the case number for. Access available countries via getAvailableCountries()
|
|
|
|
:param date: The date for which the case number is returned. Standard is the current date. Format YYYY-MM-DD
|
|
|
|
:return: The case number
|
|
|
|
"""
|
|
|
|
countryData = self.df[self.df['countriesAndTerritories'].isin([country])]
|
|
|
|
mask = (countryData['dateRep'] <= date)
|
|
|
|
countryTimeData = countryData.loc[mask]
|
2020-04-01 19:59:26 +00:00
|
|
|
return (countryTimeData['deaths'].sum() / countryTimeData['cases'].sum() * 100)
|
2020-03-30 17:57:40 +00:00
|
|
|
|
2020-04-11 12:27:50 +00:00
|
|
|
def getIsItOverGraph(self, country, plotDpi=200.0, showPlot=False) -> str:
|
2020-04-04 15:05:31 +00:00
|
|
|
"""
|
|
|
|
Get a logarhytmic graph that shows easily if the exponential growth has stopped.
|
|
|
|
:param country: The country to be compared. TODO: Change to a list of countries
|
|
|
|
:param showPlot: If a plot is to be shown in the console
|
|
|
|
:return: The file path for the plot
|
|
|
|
"""
|
2020-03-31 21:31:35 +00:00
|
|
|
countryString = country
|
|
|
|
fig = plt.figure()
|
2020-04-11 12:27:50 +00:00
|
|
|
fig.dpi = plotDpi
|
2020-03-31 21:31:35 +00:00
|
|
|
ax = fig.add_subplot(111)
|
2020-04-01 19:59:26 +00:00
|
|
|
plt.title('Is it going to end soon in ' + countryString + '?')
|
2020-03-31 21:31:35 +00:00
|
|
|
ax.set_ylabel('Case Increase')
|
|
|
|
ax.set_xlabel('Total Cases')
|
|
|
|
for index, country in enumerate([country, 'China', 'South_Korea'], start=1):
|
2020-04-04 15:05:31 +00:00
|
|
|
countryTimeData = self.df[self.df['countriesAndTerritories'].isin([country])]
|
2020-03-31 21:31:35 +00:00
|
|
|
countryTimeData = countryTimeData.sort_values('dateRep')
|
|
|
|
countryTimeData[country] = countryTimeData['cases'].rolling(7).mean()
|
2020-03-30 17:57:40 +00:00
|
|
|
|
2020-03-31 21:31:35 +00:00
|
|
|
try:
|
|
|
|
countryTimeData.plot(ax=ax, x='totalCases', y=country, loglog=True)
|
|
|
|
except:
|
|
|
|
print('Error occured')
|
2020-03-30 17:57:40 +00:00
|
|
|
|
2020-03-31 14:24:42 +00:00
|
|
|
if showPlot:
|
|
|
|
plt.show(block=True)
|
2020-03-31 21:31:35 +00:00
|
|
|
filePath = ('graphs/isItOverGraph_' + countryString + '_' + datetime.now().strftime('%Y-%m-%d'))
|
|
|
|
fig.savefig(filePath)
|
|
|
|
plt.close()
|
|
|
|
|
|
|
|
return filePath
|
2020-04-01 19:59:26 +00:00
|
|
|
|
|
|
|
def getIncreasePercentageGraph(self, country, start_date='2019-12-31', end_date=datetime.now().strftime('%Y-%m-%d'),
|
2020-04-11 12:27:50 +00:00
|
|
|
plotDpi=200.0, showPlot=False) -> str:
|
2020-04-01 19:59:26 +00:00
|
|
|
fig = plt.figure()
|
2020-04-11 12:27:50 +00:00
|
|
|
fig.dpi = plotDpi
|
2020-04-01 19:59:26 +00:00
|
|
|
ax = fig.add_subplot(111)
|
|
|
|
plt.title('Daily Percentage of Case Increase in ' + country)
|
|
|
|
|
|
|
|
countryData = self.df[self.df['countriesAndTerritories'].isin([country])]
|
|
|
|
countryData = countryData.sort_values('dateRep')
|
|
|
|
countryData['increasePercentage'] = countryData['cases'] / countryData['totalCases'] * 100
|
2020-04-04 16:08:46 +00:00
|
|
|
countryData['7-Day-Mean'] = countryData['increasePercentage'].rolling(7).mean()
|
2020-04-01 19:59:26 +00:00
|
|
|
mask = (countryData['dateRep'] >= start_date) & (countryData['dateRep'] <= end_date)
|
|
|
|
countryTimeData = countryData.loc[mask]
|
|
|
|
|
|
|
|
countryTimeData.plot(ax=ax, x='dateRep', y='increasePercentage')
|
2020-04-04 16:08:46 +00:00
|
|
|
countryTimeData.plot(ax=ax, x='dateRep', y='7-Day-Mean')
|
2020-04-01 19:59:26 +00:00
|
|
|
|
|
|
|
if showPlot:
|
|
|
|
plt.show(block=True)
|
|
|
|
filePath = ('graphs/increasePercentageGraph_' + country + '_' + datetime.now().strftime('%Y-%m-%d'))
|
|
|
|
fig.savefig(filePath)
|
|
|
|
plt.close()
|
|
|
|
|
|
|
|
return filePath
|
2020-04-10 19:59:58 +00:00
|
|
|
|
2020-04-11 12:27:50 +00:00
|
|
|
def getCasesPerMillionGraph(self, country, plotDpi=200.0, showPlot=False) -> str:
|
2020-04-10 19:59:58 +00:00
|
|
|
fig = plt.figure()
|
2020-04-11 12:27:50 +00:00
|
|
|
fig.dpi = plotDpi
|
2020-04-10 19:59:58 +00:00
|
|
|
ax = fig.add_subplot(111)
|
|
|
|
plt.title('Cases per Million Citizens in ' + country + ' compared to top 20')
|
|
|
|
|
|
|
|
date = self.getAvailableDates()[len(self.getAvailableDates())-1]
|
|
|
|
timeData = self.df
|
|
|
|
mask = (timeData['dateRep'] == date)
|
|
|
|
timeData = timeData.loc[mask]
|
|
|
|
timeData = timeData.sort_values('countriesAndTerritories')
|
2020-07-20 11:44:07 +00:00
|
|
|
timeData['casesPerMillion'] = ((timeData['totalCases'] / timeData['popData2019']) * 1000000)
|
2020-04-10 19:59:58 +00:00
|
|
|
largestData = timeData.nlargest(20, 'casesPerMillion')
|
|
|
|
|
|
|
|
if country not in largestData['countriesAndTerritories'].unique():
|
|
|
|
largestData = largestData.append(timeData.loc[timeData['countriesAndTerritories'] == country])
|
|
|
|
|
2020-04-11 12:27:50 +00:00
|
|
|
largestData = largestData.reset_index()
|
2020-04-10 19:59:58 +00:00
|
|
|
largestData.plot.bar(ax=ax, x="countriesAndTerritories", y="casesPerMillion")
|
|
|
|
|
2020-04-10 20:41:17 +00:00
|
|
|
# Highlight the selected country
|
2020-04-10 19:59:58 +00:00
|
|
|
for ticks in ax.xaxis.get_major_ticks():
|
|
|
|
if ticks.label1.get_text() == country:
|
2020-04-11 12:27:50 +00:00
|
|
|
index = largestData.index[largestData['countriesAndTerritories'] == country]
|
|
|
|
ax.patches[int(index.values[0])].set_facecolor('r')
|
2020-04-10 19:59:58 +00:00
|
|
|
|
|
|
|
if showPlot:
|
|
|
|
plt.show(block=True)
|
|
|
|
filePath = ('graphs/casesPerMillionGraph_' + country + '_' + datetime.now().strftime('%Y-%m-%d'))
|
|
|
|
fig.savefig(filePath)
|
|
|
|
plt.close()
|
|
|
|
|
|
|
|
return filePath
|
|
|
|
|
2020-04-11 12:27:50 +00:00
|
|
|
def getDeathsPerMillionGraph(self, country, plotDpi=200.0, showPlot=False) -> str:
|
2020-04-10 19:59:58 +00:00
|
|
|
fig = plt.figure()
|
2020-04-11 12:27:50 +00:00
|
|
|
fig.dpi = plotDpi
|
2020-04-10 19:59:58 +00:00
|
|
|
ax = fig.add_subplot(111)
|
|
|
|
plt.title('Deaths per Million Citizens in ' + country + ' compared to top 20')
|
|
|
|
|
|
|
|
date = self.getAvailableDates()[len(self.getAvailableDates())-1]
|
|
|
|
timeData = self.df
|
|
|
|
mask = (timeData['dateRep'] == date)
|
|
|
|
timeData = timeData.loc[mask]
|
|
|
|
timeData = timeData.sort_values('countriesAndTerritories')
|
2020-07-20 11:44:07 +00:00
|
|
|
timeData['deathsPerMillion'] = ((timeData['totalDeaths'] / timeData['popData2019']) * 1000000)
|
2020-04-10 19:59:58 +00:00
|
|
|
largestData = timeData.nlargest(20, 'deathsPerMillion')
|
|
|
|
|
|
|
|
if country not in largestData['countriesAndTerritories'].unique():
|
|
|
|
largestData = largestData.append(timeData.loc[timeData['countriesAndTerritories'] == country])
|
|
|
|
|
2020-04-11 12:27:50 +00:00
|
|
|
largestData = largestData.reset_index()
|
2020-04-10 19:59:58 +00:00
|
|
|
largestData.plot.bar(ax=ax, x="countriesAndTerritories", y="deathsPerMillion")
|
|
|
|
|
2020-04-10 20:41:17 +00:00
|
|
|
# Highlight the selected country
|
2020-04-10 19:59:58 +00:00
|
|
|
for ticks in ax.xaxis.get_major_ticks():
|
|
|
|
if ticks.label1.get_text() == country:
|
2020-04-11 12:27:50 +00:00
|
|
|
index = largestData.index[largestData['countriesAndTerritories'] == country]
|
|
|
|
ax.patches[int(index.values[0])].set_facecolor('r')
|
2020-04-10 19:59:58 +00:00
|
|
|
|
|
|
|
if showPlot:
|
|
|
|
plt.show(block=True)
|
|
|
|
filePath = ('graphs/deathsPerMillionGraph_' + country + '_' + datetime.now().strftime('%Y-%m-%d'))
|
|
|
|
fig.savefig(filePath)
|
|
|
|
plt.close()
|
|
|
|
|
|
|
|
return filePath
|
2020-04-11 21:36:57 +00:00
|
|
|
|
|
|
|
def getDoubleRateCompareGraph(self, country, plotDpi=200.0, showPlot=False) -> str:
|
|
|
|
fig = plt.figure()
|
|
|
|
fig.dpi = plotDpi
|
|
|
|
ax = fig.add_subplot(111)
|
|
|
|
plt.title('Double Rate Forecast vs Reality in ' + country)
|
|
|
|
|
|
|
|
data = self.df
|
|
|
|
mask = (data['countriesAndTerritories'] == country)
|
|
|
|
data = data.loc[mask]
|
|
|
|
data = data.sort_values('dateRep')
|
|
|
|
data['doubleRateForecast'] = data['totalCases'] / data['cases']
|
|
|
|
data['doubleRateReality'] = None
|
|
|
|
data = data.reset_index()
|
|
|
|
|
|
|
|
for index in data.index.values:
|
|
|
|
indexData = data.iloc[index]
|
|
|
|
# If there are cases in this country already
|
|
|
|
if indexData['totalCases'] > 0:
|
|
|
|
double = int(indexData['totalCases']) * 2
|
|
|
|
doubleDay = data.loc[(data['totalCases'] >= double)]
|
|
|
|
# If there is a day with double the value of cases
|
|
|
|
if len(doubleDay['dateRep'].unique()) > 0:
|
|
|
|
doubleDay = doubleDay.loc[(doubleDay['dateRep'] == doubleDay['dateRep'].unique()[0])]
|
|
|
|
|
|
|
|
indexDayDatetime = datetime.strptime(str(indexData['dateRep'])[:10], '%Y-%m-%d')
|
|
|
|
doubleDayDatetime = datetime.strptime(str(doubleDay['dateRep'].values)[2:12], '%Y-%m-%d')
|
|
|
|
|
|
|
|
difference = (doubleDayDatetime - indexDayDatetime).days
|
|
|
|
|
|
|
|
copyData = data.loc[(data['dateRep'] == indexData['dateRep'])]
|
|
|
|
copyData['doubleRateReality'] = difference
|
|
|
|
data.update(copyData)
|
|
|
|
|
|
|
|
data.plot(ax=ax, x='dateRep', y='doubleRateForecast')
|
|
|
|
data.plot(ax=ax, x='dateRep', y='doubleRateReality')
|
|
|
|
|
|
|
|
if showPlot:
|
|
|
|
plt.show(block=True)
|
|
|
|
filePath = ('graphs/doubleRateCompareGraph_' + country + '_' + datetime.now().strftime('%Y-%m-%d'))
|
|
|
|
fig.savefig(filePath)
|
|
|
|
plt.close()
|
|
|
|
|
|
|
|
return filePath
|