- Also refactored the analyser to have better performance and easier access to required values
		
			
				
	
	
		
			202 lines
		
	
	
		
			6.8 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			202 lines
		
	
	
		
			6.8 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
# -*- coding: utf-8 -*-
 | 
						|
"""
 | 
						|
Project: Analyse worldwide COVID-19 Data and provide graphs etc.
 | 
						|
 | 
						|
@author Patrick Müller
 | 
						|
"""
 | 
						|
import numpy as np
 | 
						|
import pandas as pd
 | 
						|
import matplotlib.pyplot as plt
 | 
						|
import tkinter as tk
 | 
						|
 | 
						|
"""
 | 
						|
Fields in csv:
 | 
						|
dateRep
 | 
						|
day
 | 
						|
month
 | 
						|
year
 | 
						|
cases
 | 
						|
deaths
 | 
						|
countriesAndTerritories
 | 
						|
geoId
 | 
						|
countryterritoryCode
 | 
						|
popData2018
 | 
						|
"""
 | 
						|
from datetime import datetime, timedelta
 | 
						|
 | 
						|
 | 
						|
class Analyser:
 | 
						|
	def __init__(self):
 | 
						|
		# Pandas Settings
 | 
						|
		pd.set_option('display.max_row', 50)
 | 
						|
		pd.set_option('display.max_column', 10)
 | 
						|
	
 | 
						|
		self.df = pd.read_csv('statsfile.csv')
 | 
						|
		self.df['dateRep'] = pd.to_datetime(self.df['dateRep'], format='%d/%m/%Y')
 | 
						|
		self.df = self.df.sort_values('dateRep')
 | 
						|
		self.df['totalCases'] = self.df['cases'].cumsum()
 | 
						|
		self.df['totalDeaths'] = self.df['deaths'].cumsum()
 | 
						|
		self.df['deathRate'] = self.df['deaths'] / self.df['cases'] * 100
 | 
						|
 | 
						|
	def getAvailableCountries(self):
 | 
						|
		return self.df['countriesAndTerritories'].unique()
 | 
						|
 | 
						|
	def getAvailableDates(self):
 | 
						|
		retList = []
 | 
						|
		for date in self.df['dateRep'].unique():
 | 
						|
			# To only get the substring in the format YYYY-MM-DD
 | 
						|
			retList.append(str(date)[:10])
 | 
						|
		retList.sort()
 | 
						|
		return retList
 | 
						|
 | 
						|
	def getCasesGraph(self, country, start_date='2019-12-31', end_date=datetime.now().strftime('%Y-%m-%d')) -> str:
 | 
						|
		"""
 | 
						|
		Get a graph with the absolute number of cases by day for the entered country
 | 
						|
		:param country: The country you wish to get the graph for
 | 
						|
		:param start_date: The start date of the graph
 | 
						|
		:param end_date: The end date of the graph
 | 
						|
		:return: The path for the picture of the graph
 | 
						|
		"""
 | 
						|
		if country in self.getAvailableCountries():
 | 
						|
			fig = plt.figure()
 | 
						|
			ax = fig.add_subplot(111)
 | 
						|
			plt.title(('Total cases in ' + country))
 | 
						|
 | 
						|
			countryData = self.df[self.df['countriesAndTerritories'].isin([country])]
 | 
						|
			mask = (countryData['dateRep'] >= start_date) & (countryData['dateRep'] <= end_date)
 | 
						|
			countryTimeData = countryData.loc[mask]
 | 
						|
 | 
						|
			countryTimeData.plot(ax=ax, x='dateRep', y='totalCases')
 | 
						|
 | 
						|
			plt.show(block=True)
 | 
						|
			filePath = ('graphs/casesGraph_' + country + '_' + datetime.now().strftime('%Y-%m-%d'))
 | 
						|
			fig.savefig(filePath)
 | 
						|
 | 
						|
			return filePath
 | 
						|
		else:
 | 
						|
			print('Unknown country')
 | 
						|
			return '-1'
 | 
						|
 | 
						|
	def getCaseIncreaseGraph(self, country, start_date='2019-12-31', end_date=datetime.now().strftime('%Y-%m-%d')) -> str:
 | 
						|
		"""
 | 
						|
		Get a graph with the daily increase number of cases for the entered country
 | 
						|
		:param country: The country you wish to get the graph for
 | 
						|
		:param start_date: The start date of the graph
 | 
						|
		:param end_date: The end date of the graph
 | 
						|
		:return: The path for the picture of the graph
 | 
						|
		"""
 | 
						|
		if country in self.getAvailableCountries():
 | 
						|
			fig = plt.figure()
 | 
						|
			ax = fig.add_subplot(111)
 | 
						|
			plt.title(('Daily new cases in ' + country))
 | 
						|
 | 
						|
			countryData = self.df[self.df['countriesAndTerritories'].isin([country])]
 | 
						|
			mask = (countryData['dateRep'] >= start_date) & (countryData['dateRep'] <= end_date)
 | 
						|
			countryTimeData = countryData.loc[mask]
 | 
						|
 | 
						|
			countryTimeData.plot(ax=ax, x='dateRep', y='cases')
 | 
						|
 | 
						|
			plt.show(block=True)
 | 
						|
			filePath = ('graphs/casesIncreaseGraph_' + country + '_' + datetime.now().strftime('%Y-%m-%d'))
 | 
						|
			fig.savefig(filePath)
 | 
						|
 | 
						|
			return filePath
 | 
						|
		else:
 | 
						|
			print('Unknown country')
 | 
						|
			return '-1'
 | 
						|
 | 
						|
	def getTotalCases(self, country, date=datetime.now().strftime('%Y-%m-%d')) -> int:
 | 
						|
		"""
 | 
						|
		Get the current total cases for the entered country
 | 
						|
		:param country: The country you want the case number for. Access available countries via getAvailableCountries()
 | 
						|
		:param date: The date for which the case number is returned. Standard is the current date. Format YYYY-MM-DD
 | 
						|
		:return: The case number
 | 
						|
		"""
 | 
						|
		countryData = self.df[self.df['countriesAndTerritories'].isin([country])]
 | 
						|
		mask = (countryData['dateRep'] <= date)
 | 
						|
		countryTimeData = countryData.loc[mask]
 | 
						|
		return countryTimeData['cases'].sum()
 | 
						|
 | 
						|
	def getDeathGraph(self, country, start_date='2019-12-31', end_date=datetime.now().strftime('%Y-%m-%d')) -> str:
 | 
						|
		"""
 | 
						|
				Get a graph with the absolute number of cases by day for the entered country
 | 
						|
				:param country: The country you wish to get the graph for
 | 
						|
				:param start_date: The start date of the graph
 | 
						|
				:param end_date: The end date of the graph
 | 
						|
				:return: The path for the picture of the graph
 | 
						|
				"""
 | 
						|
		if country in self.getAvailableCountries():
 | 
						|
			fig = plt.figure()
 | 
						|
			ax = fig.add_subplot(111)
 | 
						|
			plt.title(('Total deaths in ' + country))
 | 
						|
 | 
						|
			countryData = self.df[self.df['countriesAndTerritories'].isin([country])]
 | 
						|
			mask = (countryData['dateRep'] >= start_date) & (countryData['dateRep'] <= end_date)
 | 
						|
			countryTimeData = countryData.loc[mask]
 | 
						|
 | 
						|
			countryTimeData.plot(ax=ax, x='dateRep', y='totalDeaths')
 | 
						|
 | 
						|
			plt.show(block=True)
 | 
						|
			filePath = ('graphs/deathsGraph_' + country + '_' + datetime.now().strftime('%Y-%m-%d'))
 | 
						|
			fig.savefig(filePath)
 | 
						|
 | 
						|
			return filePath
 | 
						|
		else:
 | 
						|
			print('Unknown country')
 | 
						|
			return '-1'
 | 
						|
 | 
						|
	def getDeathIncreaseGraph(self, country, start_date='2019-12-31', end_date=datetime.now().strftime('%Y-%m-%d')) -> str:
 | 
						|
		"""
 | 
						|
		Get a graph with the daily increase number of cases for the entered country
 | 
						|
		:param country: The country you wish to get the graph for
 | 
						|
		:param start_date: The start date of the graph
 | 
						|
		:param end_date: The end date of the graph
 | 
						|
		:return: The path for the picture of the graph
 | 
						|
		"""
 | 
						|
		if country in self.getAvailableCountries():
 | 
						|
			fig = plt.figure()
 | 
						|
			ax = fig.add_subplot(111)
 | 
						|
			plt.title(('Daily new deaths in ' + country))
 | 
						|
 | 
						|
			countryData = self.df[self.df['countriesAndTerritories'].isin([country])]
 | 
						|
			mask = (countryData['dateRep'] >= start_date) & (countryData['dateRep'] <= end_date)
 | 
						|
			countryTimeData = countryData.loc[mask]
 | 
						|
 | 
						|
			countryTimeData.plot(ax=ax, x='dateRep', y='deaths')
 | 
						|
 | 
						|
			plt.show(block=True)
 | 
						|
			filePath = ('graphs/deathsIncreaseGraph_' + country + '_' + datetime.now().strftime('%Y-%m-%d'))
 | 
						|
			fig.savefig(filePath)
 | 
						|
 | 
						|
			return filePath
 | 
						|
		else:
 | 
						|
			print('Unknown country')
 | 
						|
			return '-1'
 | 
						|
 | 
						|
	def getDailyDeathRateGraph(self, country, start_date='2019-12-31', end_date=datetime.now().strftime('%Y-%m-%d')) -> str:
 | 
						|
		"""
 | 
						|
		Get a graph with the daily increase number of cases for the entered country
 | 
						|
		:param country: The country you wish to get the graph for
 | 
						|
		:param start_date: The start date of the graph
 | 
						|
		:param end_date: The end date of the graph
 | 
						|
		:return: The path for the picture of the graph
 | 
						|
		"""
 | 
						|
		if country in self.getAvailableCountries():
 | 
						|
			fig = plt.figure()
 | 
						|
			ax = fig.add_subplot(111)
 | 
						|
			plt.title(('Daily death rate in ' + country) + ' in %')
 | 
						|
 | 
						|
			countryData = self.df[self.df['countriesAndTerritories'].isin([country])]
 | 
						|
			mask = (countryData['dateRep'] >= start_date) & (countryData['dateRep'] <= end_date)
 | 
						|
			countryTimeData = countryData.loc[mask]
 | 
						|
 | 
						|
			countryTimeData.plot(ax=ax, x='dateRep', y='deathRate')
 | 
						|
 | 
						|
			plt.show(block=True)
 | 
						|
			filePath = ('graphs/dailyDeathRateGraph_' + country + '_' + datetime.now().strftime('%Y-%m-%d'))
 | 
						|
			fig.savefig(filePath)
 | 
						|
 | 
						|
			return filePath
 | 
						|
		else:
 | 
						|
			print('Unknown country')
 | 
						|
			return '-1' |