Refactored Analyser and added per-Million-Graphs

This commit is contained in:
Patrick Müller 2020-04-10 21:59:58 +02:00
parent cadda056fd
commit f1110d4f07
4 changed files with 1449 additions and 156 deletions

View File

@ -35,6 +35,20 @@ class Analyser:
self.df = pd.read_csv('statsfile.csv')
self.df['dateRep'] = pd.to_datetime(self.df['dateRep'], format='%d/%m/%Y')
# Calculate total Numbers for each country
self.df['totalCases'] = 0
self.df['totalDeaths'] = 0
self.df['deathRate'] = 0
for country in self.df['countriesAndTerritories'].unique():
countryData = self.df[self.df['countriesAndTerritories'].isin([country])]
countryData = countryData.sort_values('dateRep')
countryData['totalCases'] = countryData['cases'].cumsum()
countryData['totalDeaths'] = countryData['deaths'].cumsum()
countryData['deathRate'] = countryData['totalDeaths'] / countryData['totalCases'] * 100
self.df.update(countryData)
print('DEBUG: Analyser initialized')
def getAvailableCountries(self):
sorted = self.df.sort_values('countriesAndTerritories')
return sorted['countriesAndTerritories'].unique()
@ -65,7 +79,6 @@ class Analyser:
countryData = self.df[self.df['countriesAndTerritories'].isin([country])]
countryData = countryData.sort_values('dateRep')
countryData['totalCases'] = countryData['cases'].cumsum()
countryData['7-Day-Mean'] = countryData['totalCases'].rolling(7).mean()
mask = (countryData['dateRep'] >= start_date) & (countryData['dateRep'] <= end_date)
countryTimeData = countryData.loc[mask]
@ -150,7 +163,6 @@ class Analyser:
countryData = self.df[self.df['countriesAndTerritories'].isin([country])]
countryData = countryData.sort_values('dateRep')
countryData['totalDeaths'] = countryData['deaths'].cumsum()
countryData['7-Day-Mean'] = countryData['totalDeaths'].rolling(7).mean()
mask = (countryData['dateRep'] >= start_date) & (countryData['dateRep'] <= end_date)
countryTimeData = countryData.loc[mask]
@ -235,9 +247,6 @@ class Analyser:
countryData = self.df[self.df['countriesAndTerritories'].isin([country])]
countryData = countryData.sort_values('dateRep')
countryData['totalCases'] = countryData['cases'].cumsum()
countryData['totalDeaths'] = countryData['deaths'].cumsum()
countryData['deathRate'] = countryData['totalDeaths'] / countryData['totalCases'] * 100
countryData['7-Day-Mean'] = countryData['deathRate'].rolling(7).mean()
mask = (countryData['dateRep'] >= start_date) & (countryData['dateRep'] <= end_date)
countryTimeData = countryData.loc[mask]
@ -285,7 +294,6 @@ class Analyser:
for index, country in enumerate([country, 'China', 'South_Korea'], start=1):
countryTimeData = self.df[self.df['countriesAndTerritories'].isin([country])]
countryTimeData = countryTimeData.sort_values('dateRep')
countryTimeData['totalCases'] = countryTimeData['cases'].cumsum()
countryTimeData[country] = countryTimeData['cases'].rolling(7).mean()
try:
@ -310,7 +318,6 @@ class Analyser:
countryData = self.df[self.df['countriesAndTerritories'].isin([country])]
countryData = countryData.sort_values('dateRep')
countryData['totalCases'] = countryData['cases'].cumsum()
countryData['increasePercentage'] = countryData['cases'] / countryData['totalCases'] * 100
countryData['7-Day-Mean'] = countryData['increasePercentage'].rolling(7).mean()
mask = (countryData['dateRep'] >= start_date) & (countryData['dateRep'] <= end_date)
@ -326,3 +333,67 @@ class Analyser:
plt.close()
return filePath
def getCasesPerMillionGraph(self, country, showPlot=False) -> str:
fig = plt.figure()
fig.dpi = 200.0
ax = fig.add_subplot(111)
plt.title('Cases per Million Citizens in ' + country + ' compared to top 20')
date = self.getAvailableDates()[len(self.getAvailableDates())-1]
timeData = self.df
mask = (timeData['dateRep'] == date)
timeData = timeData.loc[mask]
timeData = timeData.sort_values('countriesAndTerritories')
timeData['casesPerMillion'] = ((timeData['totalCases'] / timeData['popData2018']) * 1000000)
largestData = timeData.nlargest(20, 'casesPerMillion')
if country not in largestData['countriesAndTerritories'].unique():
largestData = largestData.append(timeData.loc[timeData['countriesAndTerritories'] == country])
largestData.plot.bar(ax=ax, x="countriesAndTerritories", y="casesPerMillion")
# Hightlight the selected country
for ticks in ax.xaxis.get_major_ticks():
if ticks.label1.get_text() == country:
ax.patches[largestData.index.get_indexer([ticks.label1.get_text])[0]].set_facecolor('r')
if showPlot:
plt.show(block=True)
filePath = ('graphs/casesPerMillionGraph_' + country + '_' + datetime.now().strftime('%Y-%m-%d'))
fig.savefig(filePath)
plt.close()
return filePath
def getDeathsPerMillionGraph(self, country, showPlot=False) -> str:
fig = plt.figure()
fig.dpi = 200.0
ax = fig.add_subplot(111)
plt.title('Deaths per Million Citizens in ' + country + ' compared to top 20')
date = self.getAvailableDates()[len(self.getAvailableDates())-1]
timeData = self.df
mask = (timeData['dateRep'] == date)
timeData = timeData.loc[mask]
timeData = timeData.sort_values('countriesAndTerritories')
timeData['deathsPerMillion'] = ((timeData['totalDeaths'] / timeData['popData2018']) * 1000000)
largestData = timeData.nlargest(20, 'deathsPerMillion')
if country not in largestData['countriesAndTerritories'].unique():
largestData = largestData.append(timeData.loc[timeData['countriesAndTerritories'] == country])
largestData.plot.bar(ax=ax, x="countriesAndTerritories", y="deathsPerMillion")
# Hightlight the selected country
for ticks in ax.xaxis.get_major_ticks():
if ticks.label1.get_text() == country:
ax.patches[largestData.index.get_indexer([ticks.label1.get_text])[0]].set_facecolor('r')
if showPlot:
plt.show(block=True)
filePath = ('graphs/deathsPerMillionGraph_' + country + '_' + datetime.now().strftime('%Y-%m-%d'))
fig.savefig(filePath)
plt.close()
return filePath

View File

@ -5,10 +5,8 @@ Project: Analyse worldwide COVID-19 Data and provide graphs etc.
@author Patrick Müller
"""
import DataFetcher as fetcher
import Analyser as ana
import UserInterface as UI
if __name__ == '__main__':
fetcher.updateStatsFile()
#ana.Analyser().getIsItOverGraph('Germany', showPlot=True)
UI.main()

View File

@ -78,8 +78,8 @@ class UserInterface(QMainWindow):
labelBox.addWidget(graphTypePickerLabel)
self.graphTypePicker = QComboBox(parent=self)
self.graphTypePicker.addItems(
['Total Cases', 'Case Increase', 'Increase Percentage', 'Total Deaths', 'Death Increase', 'Death Rate',
'Is it going to end soon?'])
['Total Cases', 'Case Increase', 'Increase Percentage', 'Cases per Million', 'Total Deaths',
'Death Increase', 'Death Rate', 'Deaths per Million', 'Is it going to end soon?'])
picklistBox.addWidget(self.graphTypePicker, 1)
# Calculate Button
@ -140,7 +140,9 @@ class UserInterface(QMainWindow):
deathGraphPath = self.analyser.getDeathGraph(country, startDate, endDate)
deathIncreaseGraphPath = self.analyser.getDeathIncreaseGraph(country, startDate, endDate)
deathRateGraphPath = self.analyser.getDailyDeathRateGraph(country, startDate, endDate)
isItOverGraph = self.analyser.getIsItOverGraph(country)
isItOverGraphPath = self.analyser.getIsItOverGraph(country)
casesPerMillionGraphPath = self.analyser.getCasesPerMillionGraph(country)
deathsPerMillionGraphPath = self.analyser.getDeathsPerMillionGraph(country)
self.graphPlaceHolder = QLabel(self)
self.clearLayout(self.graphBox)
self.graphBox.addWidget(self.graphPlaceHolder)
@ -158,7 +160,11 @@ class UserInterface(QMainWindow):
elif self.graphTypePicker.currentText() == 'Death Rate':
self.graphPlaceHolder.setPixmap(QPixmap(deathRateGraphPath))
elif self.graphTypePicker.currentText() == 'Is it going to end soon?':
self.graphPlaceHolder.setPixmap(QPixmap(isItOverGraph))
self.graphPlaceHolder.setPixmap(QPixmap(isItOverGraphPath))
elif self.graphTypePicker.currentText() == 'Cases per Million':
self.graphPlaceHolder.setPixmap(QPixmap(casesPerMillionGraphPath))
elif self.graphTypePicker.currentText() == 'Deaths per Million':
self.graphPlaceHolder.setPixmap(QPixmap(deathsPerMillionGraphPath))
def clearLayout(self, layout):
for i in reversed(range(layout.count())):

File diff suppressed because it is too large Load Diff