# -*- coding: utf-8 -*- """ Project: Analyse worldwide COVID-19 Data and provide graphs etc. @author Patrick Müller """ import numpy as np import pandas as pd import matplotlib.pyplot as plt import tkinter as tk import random """ Fields in csv: dateRep day month year cases deaths countriesAndTerritories geoId countryterritoryCode popData2018 """ from datetime import datetime, timedelta class Analyser: def __init__(self): # Pandas Settings pd.set_option('display.max_row', 50) pd.set_option('display.max_column', 10) self.df = pd.read_csv('statsfile.csv') self.df['dateRep'] = pd.to_datetime(self.df['dateRep'], format='%d/%m/%Y') # Calculate total Numbers for each country self.df['totalCases'] = 0 self.df['totalDeaths'] = 0 self.df['deathRate'] = 0 for country in self.df['countriesAndTerritories'].unique(): countryData = self.df[self.df['countriesAndTerritories'].isin([country])] countryData = countryData.sort_values('dateRep') countryData['totalCases'] = countryData['cases'].cumsum() countryData['totalDeaths'] = countryData['deaths'].cumsum() countryData['deathRate'] = countryData['totalDeaths'] / countryData['totalCases'] * 100 self.df.update(countryData) print('DEBUG: Analyser initialized') def getAvailableCountries(self): sorted = self.df.sort_values('countriesAndTerritories') return sorted['countriesAndTerritories'].unique() def getAvailableDates(self): retList = [] for date in self.df['dateRep'].unique(): # To only get the substring in the format YYYY-MM-DD retList.append(str(date)[:10]) retList.sort() return retList def getCasesGraph(self, country, start_date='2019-12-31', end_date=datetime.now().strftime('%Y-%m-%d'), showPlot=False) -> str: """ Get a graph with the absolute number of cases by day for the entered country :param country: The country you wish to get the graph for :param start_date: The start date of the graph :param end_date: The end date of the graph :param showPlot: Whether to show the plot or only return the file path :return: The path for the picture of the graph """ if country in self.getAvailableCountries(): fig = plt.figure() fig.dpi = 200.0 ax = fig.add_subplot(111) plt.title(('Total cases in ' + country)) countryData = self.df[self.df['countriesAndTerritories'].isin([country])] countryData = countryData.sort_values('dateRep') countryData['7-Day-Mean'] = countryData['totalCases'].rolling(7).mean() mask = (countryData['dateRep'] >= start_date) & (countryData['dateRep'] <= end_date) countryTimeData = countryData.loc[mask] countryTimeData.plot(ax=ax, x='dateRep', y='totalCases') countryTimeData.plot(ax=ax, x='dateRep', y='7-Day-Mean') if showPlot: plt.show(block=True) filePath = ('graphs/casesGraph_' + country + '_' + datetime.now().strftime('%Y-%m-%d')) fig.savefig(filePath) plt.close(fig) return filePath else: print('Unknown country') return '-1' def getCaseIncreaseGraph(self, country, start_date='2019-12-31', end_date=datetime.now().strftime('%Y-%m-%d'), showPlot=False) -> str: """ Get a graph with the daily increase number of cases for the entered country :param country: The country you wish to get the graph for :param start_date: The start date of the graph :param end_date: The end date of the graph :param showPlot: Whether to show the plot or only return the file path :return: The path for the picture of the graph """ if country in self.getAvailableCountries(): fig = plt.figure() fig.dpi = 200.0 ax = fig.add_subplot(111) plt.title(('Daily new cases in ' + country)) countryData = self.df[self.df['countriesAndTerritories'].isin([country])] countryData = countryData.sort_values('dateRep') countryData['7-Day-Mean'] = countryData['cases'].rolling(7).mean() mask = (countryData['dateRep'] >= start_date) & (countryData['dateRep'] <= end_date) countryTimeData = countryData.loc[mask] countryTimeData.plot(ax=ax, x='dateRep', y='cases') countryTimeData.plot(ax=ax, x='dateRep', y='7-Day-Mean') if showPlot: plt.show(block=True) filePath = ('graphs/casesIncreaseGraph_' + country + '_' + datetime.now().strftime('%Y-%m-%d')) fig.savefig(filePath) plt.close(fig) return filePath else: print('Unknown country') return '-1' def getTotalCases(self, country, date=datetime.now().strftime('%Y-%m-%d')) -> int: """ Get the total cases for the entered country and date :param country: The country you want the case number for. Access available countries via getAvailableCountries() :param date: The date for which the case number is returned. Standard is the current date. Format YYYY-MM-DD :return: The case number """ countryData = self.df[self.df['countriesAndTerritories'].isin([country])] mask = (countryData['dateRep'] <= date) countryTimeData = countryData.loc[mask] return countryTimeData['cases'].sum() def getDeathGraph(self, country, start_date='2019-12-31', end_date=datetime.now().strftime('%Y-%m-%d'), showPlot=False) -> str: """ Get a graph with the absolute number of cases by day for the entered country :param country: The country you wish to get the graph for :param start_date: The start date of the graph :param end_date: The end date of the graph :param showPlot: Whether to show the plot or only return the file path :return: The path for the picture of the graph """ if country in self.getAvailableCountries(): fig = plt.figure() fig.dpi = 200.0 ax = fig.add_subplot(111) plt.title(('Total deaths in ' + country)) countryData = self.df[self.df['countriesAndTerritories'].isin([country])] countryData = countryData.sort_values('dateRep') countryData['7-Day-Mean'] = countryData['totalDeaths'].rolling(7).mean() mask = (countryData['dateRep'] >= start_date) & (countryData['dateRep'] <= end_date) countryTimeData = countryData.loc[mask] countryTimeData.plot(ax=ax, x='dateRep', y='totalDeaths') countryTimeData.plot(ax=ax, x='dateRep', y='7-Day-Mean') if showPlot: plt.show(block=True) filePath = ('graphs/deathsGraph_' + country + '_' + datetime.now().strftime('%Y-%m-%d')) fig.savefig(filePath) plt.close(fig) return filePath else: print('Unknown country') return '-1' def getDeathIncreaseGraph(self, country, start_date='2019-12-31', end_date=datetime.now().strftime('%Y-%m-%d'), showPlot=False) -> str: """ Get a graph with the daily increase number of cases for the entered country :param country: The country you wish to get the graph for :param start_date: The start date of the graph :param end_date: The end date of the graph :param showPlot: Whether to show the plot or only return the file path :return: The path for the picture of the graph """ if country in self.getAvailableCountries(): fig = plt.figure() fig.dpi = 200.0 ax = fig.add_subplot(111) plt.title(('Daily new deaths in ' + country)) countryData = self.df[self.df['countriesAndTerritories'].isin([country])] countryData = countryData.sort_values('dateRep') countryData['7-Day-Mean'] = countryData['deaths'].rolling(7).mean() mask = (countryData['dateRep'] >= start_date) & (countryData['dateRep'] <= end_date) countryTimeData = countryData.loc[mask] countryTimeData.plot(ax=ax, x='dateRep', y='deaths') countryTimeData.plot(ax=ax, x='dateRep', y='7-Day-Mean') if showPlot: plt.show(block=True) filePath = ('graphs/deathsIncreaseGraph_' + country + '_' + datetime.now().strftime('%Y-%m-%d')) fig.savefig(filePath) plt.close(fig) return filePath else: print('Unknown country') return '-1' def getTotalDeaths(self, country, date=datetime.now().strftime('%Y-%m-%d')) -> int: """ Get the total deaths for the entered country and date :param country: The country you want the case number for. Access available countries via getAvailableCountries() :param date: The date for which the case number is returned. Standard is the current date. Format YYYY-MM-DD :return: The case number """ countryData = self.df[self.df['countriesAndTerritories'].isin([country])] mask = (countryData['dateRep'] <= date) countryTimeData = countryData.loc[mask] return countryTimeData['deaths'].sum() def getDailyDeathRateGraph(self, country, start_date='2019-12-31', end_date=datetime.now().strftime('%Y-%m-%d'), showPlot=False) -> str: """ Get a graph with the daily increase number of cases for the entered country :param country: The country you wish to get the graph for :param start_date: The start date of the graph :param end_date: The end date of the graph :param showPlot: Whether to show the plot or only return the file path :return: The path for the picture of the graph """ if country in self.getAvailableCountries(): fig = plt.figure() fig.dpi = 200.0 ax = fig.add_subplot(111) plt.title(('Daily death rate in ' + country) + ' in %') countryData = self.df[self.df['countriesAndTerritories'].isin([country])] countryData = countryData.sort_values('dateRep') countryData['7-Day-Mean'] = countryData['deathRate'].rolling(7).mean() mask = (countryData['dateRep'] >= start_date) & (countryData['dateRep'] <= end_date) countryTimeData = countryData.loc[mask] countryTimeData.plot(ax=ax, x='dateRep', y='deathRate') countryTimeData.plot(ax=ax, x='dateRep', y='7-Day-Mean') if showPlot: plt.show(block=True) filePath = ('graphs/dailyDeathRateGraph_' + country + '_' + datetime.now().strftime('%Y-%m-%d')) fig.savefig(filePath) plt.close(fig) return filePath else: print('Unknown country') return '-1' def getDeathRate(self, country, date=datetime.now().strftime('%Y-%m-%d')) -> int: """ Get the death rate for the entered country and date :param country: The country you want the case number for. Access available countries via getAvailableCountries() :param date: The date for which the case number is returned. Standard is the current date. Format YYYY-MM-DD :return: The case number """ countryData = self.df[self.df['countriesAndTerritories'].isin([country])] mask = (countryData['dateRep'] <= date) countryTimeData = countryData.loc[mask] return (countryTimeData['deaths'].sum() / countryTimeData['cases'].sum() * 100) def getIsItOverGraph(self, country, showPlot=False) -> str: """ Get a logarhytmic graph that shows easily if the exponential growth has stopped. :param country: The country to be compared. TODO: Change to a list of countries :param showPlot: If a plot is to be shown in the console :return: The file path for the plot """ countryString = country fig = plt.figure() fig.dpi = 200.0 ax = fig.add_subplot(111) plt.title('Is it going to end soon in ' + countryString + '?') ax.set_ylabel('Case Increase') ax.set_xlabel('Total Cases') for index, country in enumerate([country, 'China', 'South_Korea'], start=1): countryTimeData = self.df[self.df['countriesAndTerritories'].isin([country])] countryTimeData = countryTimeData.sort_values('dateRep') countryTimeData[country] = countryTimeData['cases'].rolling(7).mean() try: countryTimeData.plot(ax=ax, x='totalCases', y=country, loglog=True) except: print('Error occured') if showPlot: plt.show(block=True) filePath = ('graphs/isItOverGraph_' + countryString + '_' + datetime.now().strftime('%Y-%m-%d')) fig.savefig(filePath) plt.close() return filePath def getIncreasePercentageGraph(self, country, start_date='2019-12-31', end_date=datetime.now().strftime('%Y-%m-%d'), showPlot=False) -> str: fig = plt.figure() fig.dpi = 200.0 ax = fig.add_subplot(111) plt.title('Daily Percentage of Case Increase in ' + country) countryData = self.df[self.df['countriesAndTerritories'].isin([country])] countryData = countryData.sort_values('dateRep') countryData['increasePercentage'] = countryData['cases'] / countryData['totalCases'] * 100 countryData['7-Day-Mean'] = countryData['increasePercentage'].rolling(7).mean() mask = (countryData['dateRep'] >= start_date) & (countryData['dateRep'] <= end_date) countryTimeData = countryData.loc[mask] countryTimeData.plot(ax=ax, x='dateRep', y='increasePercentage') countryTimeData.plot(ax=ax, x='dateRep', y='7-Day-Mean') if showPlot: plt.show(block=True) filePath = ('graphs/increasePercentageGraph_' + country + '_' + datetime.now().strftime('%Y-%m-%d')) fig.savefig(filePath) plt.close() return filePath def getCasesPerMillionGraph(self, country, showPlot=False) -> str: fig = plt.figure() fig.dpi = 200.0 ax = fig.add_subplot(111) plt.title('Cases per Million Citizens in ' + country + ' compared to top 20') date = self.getAvailableDates()[len(self.getAvailableDates())-1] timeData = self.df mask = (timeData['dateRep'] == date) timeData = timeData.loc[mask] timeData = timeData.sort_values('countriesAndTerritories') timeData['casesPerMillion'] = ((timeData['totalCases'] / timeData['popData2018']) * 1000000) largestData = timeData.nlargest(20, 'casesPerMillion') if country not in largestData['countriesAndTerritories'].unique(): largestData = largestData.append(timeData.loc[timeData['countriesAndTerritories'] == country]) largestData.plot.bar(ax=ax, x="countriesAndTerritories", y="casesPerMillion") # Hightlight the selected country for ticks in ax.xaxis.get_major_ticks(): if ticks.label1.get_text() == country: ax.patches[largestData.index.get_indexer([ticks.label1.get_text])[0]].set_facecolor('r') if showPlot: plt.show(block=True) filePath = ('graphs/casesPerMillionGraph_' + country + '_' + datetime.now().strftime('%Y-%m-%d')) fig.savefig(filePath) plt.close() return filePath def getDeathsPerMillionGraph(self, country, showPlot=False) -> str: fig = plt.figure() fig.dpi = 200.0 ax = fig.add_subplot(111) plt.title('Deaths per Million Citizens in ' + country + ' compared to top 20') date = self.getAvailableDates()[len(self.getAvailableDates())-1] timeData = self.df mask = (timeData['dateRep'] == date) timeData = timeData.loc[mask] timeData = timeData.sort_values('countriesAndTerritories') timeData['deathsPerMillion'] = ((timeData['totalDeaths'] / timeData['popData2018']) * 1000000) largestData = timeData.nlargest(20, 'deathsPerMillion') if country not in largestData['countriesAndTerritories'].unique(): largestData = largestData.append(timeData.loc[timeData['countriesAndTerritories'] == country]) largestData.plot.bar(ax=ax, x="countriesAndTerritories", y="deathsPerMillion") # Hightlight the selected country for ticks in ax.xaxis.get_major_ticks(): if ticks.label1.get_text() == country: ax.patches[largestData.index.get_indexer([ticks.label1.get_text])[0]].set_facecolor('r') if showPlot: plt.show(block=True) filePath = ('graphs/deathsPerMillionGraph_' + country + '_' + datetime.now().strftime('%Y-%m-%d')) fig.savefig(filePath) plt.close() return filePath