covid-19-analysis/Analyser.py

207 lines
7.2 KiB
Python
Raw Normal View History

2020-03-28 11:37:54 +00:00
# -*- coding: utf-8 -*-
"""
Project: Analyse worldwide COVID-19 Data and provide graphs etc.
@author Patrick Müller
"""
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tkinter as tk
"""
Fields in csv:
dateRep
day
month
year
cases
deaths
countriesAndTerritories
geoId
countryterritoryCode
popData2018
"""
2020-03-28 15:55:34 +00:00
from datetime import datetime, timedelta
2020-03-28 11:37:54 +00:00
class Analyser:
def __init__(self):
# Pandas Settings
pd.set_option('display.max_row', 50)
pd.set_option('display.max_column', 10)
self.df = pd.read_csv('statsfile.csv')
self.df['dateRep'] = pd.to_datetime(self.df['dateRep'], format='%d/%m/%Y')
2020-03-28 11:37:54 +00:00
def getAvailableCountries(self):
sorted = self.df.sort_values('countriesAndTerritories')
return sorted['countriesAndTerritories'].unique()
2020-03-28 11:37:54 +00:00
def getAvailableDates(self):
retList = []
for date in self.df['dateRep'].unique():
# To only get the substring in the format YYYY-MM-DD
retList.append(str(date)[:10])
retList.sort()
return retList
def getCasesGraph(self, country, start_date='2019-12-31', end_date=datetime.now().strftime('%Y-%m-%d')) -> str:
2020-03-28 15:55:34 +00:00
"""
Get a graph with the absolute number of cases by day for the entered country
:param country: The country you wish to get the graph for
2020-03-30 15:02:56 +00:00
:param start_date: The start date of the graph
:param end_date: The end date of the graph
2020-03-28 15:55:34 +00:00
:return: The path for the picture of the graph
"""
if country in self.getAvailableCountries():
fig = plt.figure()
ax = fig.add_subplot(111)
plt.title(('Total cases in ' + country))
countryData = self.df[self.df['countriesAndTerritories'].isin([country])]
mask = (countryData['dateRep'] >= start_date) & (countryData['dateRep'] <= end_date)
countryTimeData = countryData.loc[mask]
countryTimeData = countryTimeData.sort_values('dateRep')
countryTimeData['totalCases'] = countryTimeData['cases'].cumsum()
countryTimeData.plot(ax=ax, x='dateRep', y='totalCases')
plt.show(block=True)
filePath = ('graphs/casesGraph_' + country + '_' + datetime.now().strftime('%Y-%m-%d'))
2020-03-29 10:31:56 +00:00
fig.savefig(filePath)
return filePath
else:
print('Unknown country')
return '-1'
def getCaseIncreaseGraph(self, country, start_date='2019-12-31', end_date=datetime.now().strftime('%Y-%m-%d')) -> str:
"""
Get a graph with the daily increase number of cases for the entered country
:param country: The country you wish to get the graph for
2020-03-30 15:02:56 +00:00
:param start_date: The start date of the graph
:param end_date: The end date of the graph
:return: The path for the picture of the graph
"""
if country in self.getAvailableCountries():
fig = plt.figure()
ax = fig.add_subplot(111)
plt.title(('Daily new cases in ' + country))
countryData = self.df[self.df['countriesAndTerritories'].isin([country])]
mask = (countryData['dateRep'] >= start_date) & (countryData['dateRep'] <= end_date)
countryTimeData = countryData.loc[mask]
countryTimeData.plot(ax=ax, x='dateRep', y='cases')
plt.show(block=True)
filePath = ('graphs/casesIncreaseGraph_' + country + '_' + datetime.now().strftime('%Y-%m-%d'))
2020-03-29 10:31:56 +00:00
fig.savefig(filePath)
2020-03-28 15:55:34 +00:00
return filePath
else:
print('Unknown country')
return '-1'
2020-03-28 15:55:34 +00:00
def getTotalCases(self, country, date=datetime.now().strftime('%Y-%m-%d')) -> int:
"""
Get the current total cases for the entered country
:param country: The country you want the case number for. Access available countries via getAvailableCountries()
:param date: The date for which the case number is returned. Standard is the current date. Format YYYY-MM-DD
2020-03-28 15:55:34 +00:00
:return: The case number
"""
countryData = self.df[self.df['countriesAndTerritories'].isin([country])]
mask = (countryData['dateRep'] <= date)
countryTimeData = countryData.loc[mask]
return countryTimeData['cases'].sum()
2020-03-30 15:02:56 +00:00
def getDeathGraph(self, country, start_date='2019-12-31', end_date=datetime.now().strftime('%Y-%m-%d')) -> str:
"""
Get a graph with the absolute number of cases by day for the entered country
:param country: The country you wish to get the graph for
:param start_date: The start date of the graph
:param end_date: The end date of the graph
:return: The path for the picture of the graph
"""
if country in self.getAvailableCountries():
fig = plt.figure()
ax = fig.add_subplot(111)
plt.title(('Total deaths in ' + country))
2020-03-30 15:02:56 +00:00
countryData = self.df[self.df['countriesAndTerritories'].isin([country])]
mask = (countryData['dateRep'] >= start_date) & (countryData['dateRep'] <= end_date)
2020-03-30 15:02:56 +00:00
countryTimeData = countryData.loc[mask]
countryTimeData = countryTimeData.sort_values('dateRep')
countryTimeData['totalDeaths'] = countryTimeData['deaths'].cumsum()
2020-03-30 15:02:56 +00:00
countryTimeData.plot(ax=ax, x='dateRep', y='totalDeaths')
2020-03-30 15:02:56 +00:00
plt.show(block=True)
filePath = ('graphs/deathsGraph_' + country + '_' + datetime.now().strftime('%Y-%m-%d'))
2020-03-30 15:02:56 +00:00
fig.savefig(filePath)
return filePath
else:
print('Unknown country')
return '-1'
def getDeathIncreaseGraph(self, country, start_date='2019-12-31', end_date=datetime.now().strftime('%Y-%m-%d')) -> str:
"""
Get a graph with the daily increase number of cases for the entered country
:param country: The country you wish to get the graph for
:param start_date: The start date of the graph
:param end_date: The end date of the graph
:return: The path for the picture of the graph
"""
if country in self.getAvailableCountries():
fig = plt.figure()
ax = fig.add_subplot(111)
plt.title(('Daily new deaths in ' + country))
2020-03-30 15:02:56 +00:00
countryData = self.df[self.df['countriesAndTerritories'].isin([country])]
mask = (countryData['dateRep'] >= start_date) & (countryData['dateRep'] <= end_date)
2020-03-30 15:02:56 +00:00
countryTimeData = countryData.loc[mask]
countryTimeData.plot(ax=ax, x='dateRep', y='deaths')
plt.show(block=True)
filePath = ('graphs/deathsIncreaseGraph_' + country + '_' + datetime.now().strftime('%Y-%m-%d'))
fig.savefig(filePath)
return filePath
else:
print('Unknown country')
return '-1'
def getDailyDeathRateGraph(self, country, start_date='2019-12-31', end_date=datetime.now().strftime('%Y-%m-%d')) -> str:
"""
Get a graph with the daily increase number of cases for the entered country
:param country: The country you wish to get the graph for
:param start_date: The start date of the graph
:param end_date: The end date of the graph
:return: The path for the picture of the graph
"""
if country in self.getAvailableCountries():
fig = plt.figure()
ax = fig.add_subplot(111)
plt.title(('Daily death rate in ' + country) + ' in %')
countryData = self.df[self.df['countriesAndTerritories'].isin([country])]
mask = (countryData['dateRep'] >= start_date) & (countryData['dateRep'] <= end_date)
countryTimeData = countryData.loc[mask]
countryTimeData = countryTimeData.sort_values('dateRep')
countryTimeData['totalCases'] = countryTimeData['cases'].cumsum()
countryTimeData['totalDeaths'] = countryTimeData['deaths'].cumsum()
countryTimeData['deathRate'] = countryTimeData['totalDeaths'] / countryTimeData['totalCases'] * 100
countryTimeData.plot(ax=ax, x='dateRep', y='deathRate')
plt.show(block=True)
filePath = ('graphs/dailyDeathRateGraph_' + country + '_' + datetime.now().strftime('%Y-%m-%d'))
2020-03-30 15:02:56 +00:00
fig.savefig(filePath)
return filePath
else:
print('Unknown country')
return '-1'