2020-03-28 11:37:54 +00:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
"""
|
|
|
|
Project: Analyse worldwide COVID-19 Data and provide graphs etc.
|
|
|
|
|
|
|
|
@author Patrick Müller
|
|
|
|
"""
|
|
|
|
import numpy as np
|
|
|
|
import pandas as pd
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
import tkinter as tk
|
|
|
|
|
|
|
|
"""
|
|
|
|
Fields in csv:
|
|
|
|
dateRep
|
|
|
|
day
|
|
|
|
month
|
|
|
|
year
|
|
|
|
cases
|
|
|
|
deaths
|
|
|
|
countriesAndTerritories
|
|
|
|
geoId
|
|
|
|
countryterritoryCode
|
|
|
|
popData2018
|
|
|
|
"""
|
2020-03-28 15:55:34 +00:00
|
|
|
from datetime import datetime, timedelta
|
2020-03-28 11:37:54 +00:00
|
|
|
|
|
|
|
|
|
|
|
class Analyser:
|
2020-03-29 20:23:30 +00:00
|
|
|
def __init__(self):
|
|
|
|
# Pandas Settings
|
|
|
|
pd.set_option('display.max_row', 50)
|
|
|
|
pd.set_option('display.max_column', 10)
|
|
|
|
|
|
|
|
self.df = pd.read_csv('statsfile.csv')
|
|
|
|
self.df['dateRep'] = pd.to_datetime(self.df['dateRep'], format='%d/%m/%Y')
|
2020-03-28 11:37:54 +00:00
|
|
|
|
|
|
|
def getAvailableCountries(self):
|
|
|
|
return self.df['countriesAndTerritories'].unique()
|
|
|
|
|
2020-03-29 20:23:30 +00:00
|
|
|
def getAvailableDates(self):
|
|
|
|
retList = []
|
|
|
|
for date in self.df['dateRep'].unique():
|
|
|
|
# To only get the substring in the format YYYY-MM-DD
|
|
|
|
retList.append(str(date)[:10])
|
|
|
|
retList.sort()
|
|
|
|
return retList
|
|
|
|
|
2020-03-28 20:45:50 +00:00
|
|
|
def getCasesGraph(self, country, start_date='2019-12-31', end_date=datetime.now().strftime('%Y-%m-%d')) -> str:
|
2020-03-28 15:55:34 +00:00
|
|
|
"""
|
|
|
|
Get a graph with the absolute number of cases by day for the entered country
|
|
|
|
:param country: The country you wish to get the graph for
|
2020-03-30 15:02:56 +00:00
|
|
|
:param start_date: The start date of the graph
|
|
|
|
:param end_date: The end date of the graph
|
2020-03-28 15:55:34 +00:00
|
|
|
:return: The path for the picture of the graph
|
|
|
|
"""
|
2020-03-28 20:45:50 +00:00
|
|
|
if country in self.getAvailableCountries():
|
|
|
|
fig = plt.figure()
|
|
|
|
ax = fig.add_subplot(111)
|
2020-03-29 20:23:30 +00:00
|
|
|
plt.title(('Total cases graph for ' + country))
|
2020-03-28 20:45:50 +00:00
|
|
|
|
|
|
|
countryData = self.df[self.df['countriesAndTerritories'].isin([country])]
|
|
|
|
mask = (countryData['dateRep'] > start_date) & (countryData['dateRep'] <= end_date)
|
|
|
|
countryTimeData = countryData.loc[mask]
|
|
|
|
countryTimeData = countryTimeData.sort_values('dateRep')
|
|
|
|
countryTimeData['cases'] = countryTimeData['cases'].cumsum()
|
|
|
|
|
|
|
|
countryTimeData.plot(ax=ax, x='dateRep', y='cases')
|
|
|
|
|
|
|
|
plt.show(block=True)
|
2020-03-29 10:21:50 +00:00
|
|
|
filePath = ('graphs/casesGraph_' + country + '_' + datetime.now().strftime('%Y-%m-%d'))
|
2020-03-29 10:31:56 +00:00
|
|
|
fig.savefig(filePath)
|
2020-03-28 20:45:50 +00:00
|
|
|
|
2020-03-29 10:21:50 +00:00
|
|
|
return filePath
|
2020-03-28 20:45:50 +00:00
|
|
|
else:
|
|
|
|
print('Unknown country')
|
2020-03-29 10:21:50 +00:00
|
|
|
return '-1'
|
2020-03-28 20:45:50 +00:00
|
|
|
|
2020-03-29 20:23:30 +00:00
|
|
|
def getCaseIncreaseGraph(self, country, start_date='2019-12-31', end_date=datetime.now().strftime('%Y-%m-%d')) -> str:
|
2020-03-28 20:45:50 +00:00
|
|
|
"""
|
|
|
|
Get a graph with the daily increase number of cases for the entered country
|
|
|
|
:param country: The country you wish to get the graph for
|
2020-03-30 15:02:56 +00:00
|
|
|
:param start_date: The start date of the graph
|
|
|
|
:param end_date: The end date of the graph
|
2020-03-28 20:45:50 +00:00
|
|
|
:return: The path for the picture of the graph
|
|
|
|
"""
|
|
|
|
if country in self.getAvailableCountries():
|
|
|
|
fig = plt.figure()
|
|
|
|
ax = fig.add_subplot(111)
|
2020-03-30 15:02:56 +00:00
|
|
|
plt.title(('Daily case increase graph for ' + country))
|
2020-03-28 20:45:50 +00:00
|
|
|
|
|
|
|
countryData = self.df[self.df['countriesAndTerritories'].isin([country])]
|
2020-03-29 20:23:30 +00:00
|
|
|
mask = (countryData['dateRep'] > start_date) & (countryData['dateRep'] <= end_date)
|
2020-03-28 20:45:50 +00:00
|
|
|
countryTimeData = countryData.loc[mask]
|
|
|
|
|
|
|
|
countryTimeData.plot(ax=ax, x='dateRep', y='cases')
|
|
|
|
|
|
|
|
plt.show(block=True)
|
2020-03-29 10:21:50 +00:00
|
|
|
filePath = ('graphs/casesIncreaseGraph_' + country + '_' + datetime.now().strftime('%Y-%m-%d'))
|
2020-03-29 10:31:56 +00:00
|
|
|
fig.savefig(filePath)
|
2020-03-28 15:55:34 +00:00
|
|
|
|
2020-03-29 10:21:50 +00:00
|
|
|
return filePath
|
2020-03-28 20:45:50 +00:00
|
|
|
else:
|
|
|
|
print('Unknown country')
|
2020-03-29 10:21:50 +00:00
|
|
|
return '-1'
|
2020-03-28 15:55:34 +00:00
|
|
|
|
|
|
|
def getTotalCases(self, country, date=datetime.now().strftime('%Y-%m-%d')) -> int:
|
|
|
|
"""
|
|
|
|
Get the current total cases for the entered country
|
2020-03-28 19:17:51 +00:00
|
|
|
:param country: The country you want the case number for. Access available countries via getAvailableCountries()
|
|
|
|
:param date: The date for which the case number is returned. Standard is the current date. Format YYYY-MM-DD
|
2020-03-28 15:55:34 +00:00
|
|
|
:return: The case number
|
|
|
|
"""
|
|
|
|
countryData = self.df[self.df['countriesAndTerritories'].isin([country])]
|
2020-03-28 19:17:51 +00:00
|
|
|
mask = (countryData['dateRep'] <= date)
|
|
|
|
countryTimeData = countryData.loc[mask]
|
|
|
|
return countryTimeData['cases'].sum()
|
2020-03-30 15:02:56 +00:00
|
|
|
|
|
|
|
def getDeathGraph(self, country, start_date='2019-12-31', end_date=datetime.now().strftime('%Y-%m-%d')) -> str:
|
|
|
|
"""
|
|
|
|
Get a graph with the absolute number of cases by day for the entered country
|
|
|
|
:param country: The country you wish to get the graph for
|
|
|
|
:param start_date: The start date of the graph
|
|
|
|
:param end_date: The end date of the graph
|
|
|
|
:return: The path for the picture of the graph
|
|
|
|
"""
|
|
|
|
if country in self.getAvailableCountries():
|
|
|
|
fig = plt.figure()
|
|
|
|
ax = fig.add_subplot(111)
|
|
|
|
plt.title(('Total deaths graph for ' + country))
|
|
|
|
|
|
|
|
countryData = self.df[self.df['countriesAndTerritories'].isin([country])]
|
|
|
|
mask = (countryData['dateRep'] > start_date) & (countryData['dateRep'] <= end_date)
|
|
|
|
countryTimeData = countryData.loc[mask]
|
|
|
|
countryTimeData = countryTimeData.sort_values('dateRep')
|
|
|
|
countryTimeData['deaths'] = countryTimeData['deaths'].cumsum()
|
|
|
|
|
|
|
|
countryTimeData.plot(ax=ax, x='dateRep', y='deaths')
|
|
|
|
|
|
|
|
plt.show(block=True)
|
|
|
|
filePath = ('graphs/casesGraph_' + country + '_' + datetime.now().strftime('%Y-%m-%d'))
|
|
|
|
fig.savefig(filePath)
|
|
|
|
|
|
|
|
return filePath
|
|
|
|
else:
|
|
|
|
print('Unknown country')
|
|
|
|
return '-1'
|
|
|
|
|
|
|
|
def getDeathIncreaseGraph(self, country, start_date='2019-12-31', end_date=datetime.now().strftime('%Y-%m-%d')) -> str:
|
|
|
|
"""
|
|
|
|
Get a graph with the daily increase number of cases for the entered country
|
|
|
|
:param country: The country you wish to get the graph for
|
|
|
|
:param start_date: The start date of the graph
|
|
|
|
:param end_date: The end date of the graph
|
|
|
|
:return: The path for the picture of the graph
|
|
|
|
"""
|
|
|
|
if country in self.getAvailableCountries():
|
|
|
|
fig = plt.figure()
|
|
|
|
ax = fig.add_subplot(111)
|
|
|
|
plt.title(('Daily deaths graph for ' + country))
|
|
|
|
|
|
|
|
countryData = self.df[self.df['countriesAndTerritories'].isin([country])]
|
|
|
|
mask = (countryData['dateRep'] > start_date) & (countryData['dateRep'] <= end_date)
|
|
|
|
countryTimeData = countryData.loc[mask]
|
|
|
|
|
|
|
|
countryTimeData.plot(ax=ax, x='dateRep', y='deaths')
|
|
|
|
|
|
|
|
plt.show(block=True)
|
|
|
|
filePath = ('graphs/casesIncreaseGraph_' + country + '_' + datetime.now().strftime('%Y-%m-%d'))
|
|
|
|
fig.savefig(filePath)
|
|
|
|
|
|
|
|
return filePath
|
|
|
|
else:
|
|
|
|
print('Unknown country')
|
|
|
|
return '-1'
|