# -*- coding: utf-8 -*- """ Project: Analyse worldwide COVID-19 Data and provide graphs etc. @author Patrick Müller """ import numpy as np import pandas as pd import matplotlib.pyplot as plt import tkinter as tk """ Fields in csv: dateRep day month year cases deaths countriesAndTerritories geoId countryterritoryCode popData2018 """ from datetime import datetime, timedelta class Analyser: # Pandas Settings pd.set_option('display.max_row', 50) pd.set_option('display.max_column', 10) df = pd.read_csv('statsfile.csv') df['dateRep'] = pd.to_datetime(df['dateRep']) def getAvailableCountries(self): return self.df['countriesAndTerritories'].unique() def getCasesGraph(self, country, start_date='2019-12-31', end_date=datetime.now().strftime('%Y-%m-%d')) -> str: """ Get a graph with the absolute number of cases by day for the entered country :param country: The country you wish to get the graph for :return: The path for the picture of the graph """ if country in self.getAvailableCountries(): fig = plt.figure() ax = fig.add_subplot(111) # casesPerDayDict = {} # for date in self.df['dateRep']: # casesPerDayDict[date] = self.getTotalCases(country, date) countryData = self.df[self.df['countriesAndTerritories'].isin([country])] mask = (countryData['dateRep'] > start_date) & (countryData['dateRep'] <= end_date) countryTimeData = countryData.loc[mask] countryTimeData = countryTimeData.sort_values('dateRep') countryTimeData['cases'] = countryTimeData['cases'].cumsum() print(countryTimeData.head()) countryTimeData.plot(ax=ax, x='dateRep', y='cases') plt.show(block=True) # TODO Change return return 'abc' else: print('Unknown country') # TODO Change return return 'abc' def getCaseIncreaseGraph(self, country): """ Get a graph with the daily increase number of cases for the entered country :param country: The country you wish to get the graph for :return: The path for the picture of the graph """ if country in self.getAvailableCountries(): fig = plt.figure() ax = fig.add_subplot(111) # casesPerDayDict = {} # for date in self.df['dateRep']: # casesPerDayDict[date] = self.getTotalCases(country, date) countryData = self.df[self.df['countriesAndTerritories'].isin([country])] mask = (countryData['dateRep'] <= datetime.now().strftime('%Y-%m-%d')) countryTimeData = countryData.loc[mask] countryTimeData.plot(ax=ax, x='dateRep', y='cases') plt.show(block=True) # TODO Change return return 'abc' else: print('Unknown country') # TODO Change return return 'abc' def getTotalCases(self, country, date=datetime.now().strftime('%Y-%m-%d')) -> int: """ Get the current total cases for the entered country :param country: The country you want the case number for. Access available countries via getAvailableCountries() :param date: The date for which the case number is returned. Standard is the current date. Format YYYY-MM-DD :return: The case number """ countryData = self.df[self.df['countriesAndTerritories'].isin([country])] mask = (countryData['dateRep'] <= date) countryTimeData = countryData.loc[mask] return countryTimeData['cases'].sum()