🎉 Initial commit

2021-02-28 13:11:14 +01:00 · 2021-02-28 13:11:14 +01:00 · 430769bd33
commit 430769bd33
4 changed files with 249 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,6 @@
+# DHBW-RaPla-Vorratsdatenspeicherung
+
+Dieses Projekt dient dazu, den Vorlesungsplan von TINF19B4 zu vorratsdatenspeichern.
+Dazu wird das Script stündlich automatisiert aufgerufen und speichert jede Änderung an Vorlesungen
+in SQL ab. Dadurch können auch Details von Events, die bereits aus RaPla gelöscht wurden, abgerufen werden und
+zusätzlich haben wir eine Übersicht über alle Änderungen.
--- a/SQLConnectionHandler.py
+++ b/SQLConnectionHandler.py
@ -0,0 +1,32 @@
+import pymysql
+import os
+import logging
+
+def getConnection() -> pymysql.Connection:
+	"""
+	Get a connection to SQL.
+	This function is used on the vServer for local testing
+	@return: pymysql connection object
+	"""
+	try:
+		if os.environ['IS_VSERVER'] == 'true':
+			conn = pymysql.connect(
+				user=os.environ['vServer_SQL_User'],
+				password=os.environ['vServer_SQL_Password'],
+				host='localhost',
+				port=3306,
+				database='DHBW-RaPla-Vorratsdatenspeicherung'
+			)
+		else:
+			conn = pymysql.connect(
+				user=os.environ['PADDY_SQL_USER'],
+				password=os.environ['PADDY_SQL_PASSWORD'],
+				host=os.environ['SQL_SERVER'],
+				port=3306,
+				database='DHBW-RaPla-Vorratsdatenspeicherung'
+			)
+
+		return conn
+	except pymysql.Error as e:
+		logging.error('SQL Connection error: %s', e)
+		return None
--- a/main.py
+++ b/main.py
@ -0,0 +1,209 @@
+from collections import ChainMap
+
+from icalevents import icalevents
+import sys
+from httplib2 import Http
+from datetime import datetime, timedelta
+import SQLConnectionHandler
+
+
+def crawl():
+	if sys.platform == 'win32':
+		http = Http(disable_ssl_certificate_validation=True)
+	else:
+		http = Http()
+
+	#    ______     __                                                __
+	#   / ____/__  / /_   ____  ___ _      __   ___ _   _____  ____  / /______
+	#  / / __/ _ \/ __/  / __ \/ _ \ | /| / /  / _ \ | / / _ \/ __ \/ __/ ___/
+	# / /_/ /  __/ /_   / / / /  __/ |/ |/ /  /  __/ |/ /  __/ / / / /_(__  )
+	# \____/\___/\__/  /_/ /_/\___/|__/|__/   \___/|___/\___/_/ /_/\__/____/
+
+	# Get events in the next year from RaPla
+	events = icalevents.events(url='https://rapla.dhbw-karlsruhe.de/rapla?page=ical&user=eisenbiegler&file=TINF19B4',
+							   http=http, start=datetime.strptime('2010-01-01', '%Y-%m-%d'),
+							   end=datetime.now() + timedelta(days=365))
+
+	#    ______     __     __      __            __         __      __           ____                        _____ ____    __
+	#   / ____/__  / /_   / /___ _/ /____  _____/ /_   ____/ /___ _/ /_____ _   / __/________  ____ ___     / ___// __ \  / /
+	#  / / __/ _ \/ __/  / / __ `/ __/ _ \/ ___/ __/  / __  / __ `/ __/ __ `/  / /_/ ___/ __ \/ __ `__ \    \__ \/ / / / / /
+	# / /_/ /  __/ /_   / / /_/ / /_/  __(__  ) /_   / /_/ / /_/ / /_/ /_/ /  / __/ /  / /_/ / / / / / /   ___/ / /_/ / / /___
+	# \____/\___/\__/  /_/\__,_/\__/\___/____/\__/   \__,_/\__,_/\__/\__,_/  /_/ /_/   \____/_/ /_/ /_/   /____/\___\_\/_____/
+
+	# Select existing event UIDs from RaPla
+	conn = SQLConnectionHandler.getConnection()
+	cur = conn.cursor()
+	cur.execute("SELECT uid FROM rapla_entries")
+	uids = cur.fetchall()
+	uids = list(x[0] for x in uids)  # Otherwise, uids would be a list of tuples
+	cur.execute("""
+		WITH summary AS (
+			SELECT ch.*,
+				   ROW_NUMBER() OVER(PARTITION BY ch.entry_id
+										 ORDER BY ch.change_id DESC) AS rk
+			  FROM rapla_changes ch)
+		SELECT s.*, entr.uid
+		FROM summary s
+		LEFT OUTER JOIN rapla_entries entr ON s.entry_id = entr.entry_id
+		WHERE s.rk = 1;
+	""")
+	changes = cur.fetchall()
+
+	#     ____                                         __      __
+	#    / __ \________  ____  ____ _________     ____/ /___ _/ /_____ _
+	#   / /_/ / ___/ _ \/ __ \/ __ `/ ___/ _ \   / __  / __ `/ __/ __ `/
+	#  / ____/ /  /  __/ /_/ / /_/ / /  /  __/  / /_/ / /_/ / /_/ /_/ /
+	# /_/   /_/   \___/ .___/\__,_/_/   \___/   \__,_/\__,_/\__/\__,_/
+	#                /_/
+
+	# The following function simply creates a dict with the event uid as key and all other info as value from the tuples
+	# Say we have the list of tuples [(1, 'abc'), (2, 'def')] where abc and def are the uids. The the function would generate
+	# a dict in the form of {'abc': (1, 'abc'), 'def': (1, 'def')}
+	# The map function basically returns a list of small dicts and the ChainMap combines them to one big dict
+	changeDict = dict(ChainMap(*map(lambda x: {x[15]: x}, changes)))
+
+	# Change booleans back to boolean values as they come from SQL as integers
+	for change in changeDict:
+		workingList = list(changeDict[change])
+		workingList[13] = changeDict[change][13] == 1
+		workingList[3] = changeDict[change][3] == 1
+		changeDict[change] = tuple(workingList)
+
+	newEvents = []
+	updatedEvents = []
+
+	# Append number of recurring event to UID so we can tell them apart
+	# Also removes the timezone from all the datetime objects
+	evtIdx = {}
+	for event in events:
+		if not event.uid in evtIdx.keys():
+			# Event not known yet
+			uid = event.uid
+			event.uid = uid + '---0'
+			evtIdx[uid] = 1
+		else:
+			uid = event.uid
+			event.uid = uid + '---' + str(evtIdx[uid])
+			evtIdx[uid] += 1
+		# Remove timezones
+		event.start = removeTimezone(event.start)
+		event.end = removeTimezone(event.end)
+		event.last_modified = removeTimezone(event.last_modified)
+		event.created = removeTimezone(event.created)
+
+	#    ________              __      ____                   __
+	#   / ____/ /_  ___  _____/ /__   / __/___  _____   _____/ /_  ____ _____  ____ ____  _____
+	#  / /   / __ \/ _ \/ ___/ //_/  / /_/ __ \/ ___/  / ___/ __ \/ __ `/ __ \/ __ `/ _ \/ ___/
+	# / /___/ / / /  __/ /__/ ,<    / __/ /_/ / /     / /__/ / / / /_/ / / / / /_/ /  __(__  )
+	# \____/_/ /_/\___/\___/_/|_|  /_/  \____/_/      \___/_/ /_/\__,_/_/ /_/\__, /\___/____/
+	#                                                                       /____/
+
+	for event in events:
+		if not event.uid in uids:
+			# New event, create event entry and new changeset
+			if not event.uid in list(x.get('uid') for x in newEvents):
+				# Only add to list if this event is not in the list yet (can happen in case of recurring events)
+				evt = {
+					# TODO add checks for existing values
+					"uid": event.uid,
+					"isDeleted": False,
+					"new_summary": event.summary,
+					"new_description": event.description,
+					"new_start": event.start,
+					"new_end": event.end,
+					"new_last_modified": event.last_modified,
+					"new_created": event.created,
+					"new_location": event.location,
+					"new_organizer": event.organizer,
+					"new_categories": event.categories[0] if event.categories else '',
+					"new_recurring": event.recurring
+				}
+				newEvents.append(evt)
+		else:
+			# Event is known, create new changeset
+			hasChanges = False
+			latestKnownState = changeDict.get(event.uid)
+			changeSet = latestKnownState[4:14]  # Relevant subset of latest know state data
+			newEventTuple = tuple(
+				[event.summary, event.description, event.start, event.end, event.last_modified, event.created,
+				 event.location, event.organizer, (event.categories[0] if event.categories else ''), event.recurring,
+				 event.uid])
+
+			# Check every value by looping over both the old and new tuples
+			for dataIndex in range(0, 10):
+				if newEventTuple[dataIndex] != changeSet[dataIndex]:
+					hasChanges = True
+
+			if hasChanges:
+				updatedEvents.append(newEventTuple)
+
+	# Now also check for deleted events
+	deletedEvents = []
+	for uid in uids:
+		# We have to check for every known uid if it still exists
+		# The following condition my be a bit overwhelming but it basically checks three things:
+		#	1. Is the uid in SQL but not in the list of fetched events? -> Has been deleted from RaPla?
+		#	2. If there is already a changeset for this event, is the latest known state that it is not deleted?
+		#	3. If there is no changeset for it yet, we can't check the latest known state so we just set it to deleted
+		#		-> this basically can't ever happen with real data but it happened during testing and it doesn't hurt to let in in here
+		if uid not in list(x.uid for x in events) and (uid in changeDict.keys() and not changeDict[uid][3] or uid not in changeDict.keys()):
+			# Only insert if there is no 'deleted' record yet
+			deletedEvents.append(tuple([uid]))
+
+	#  _       __     _ __          __               __      __           _____ ____    __
+	# | |     / /____(_) /____     / /_  ____ ______/ /__   / /_____     / ___// __ \  / /
+	# | | /| / / ___/ / __/ _ \   / __ \/ __ `/ ___/ //_/  / __/ __ \    \__ \/ / / / / /
+	# | |/ |/ / /  / / /_/  __/  / /_/ / /_/ / /__/ ,<    / /_/ /_/ /   ___/ / /_/ / / /___
+	#  __/|__/_/  /_/\__/\___/  /_.___/\__,_/\___/_/|_|   \__/\____/   /____/\___\_\/_____/
+
+	# Insert new events into rapla_entries table
+	if newEvents:
+		# Insert new events if there are any
+		cur.executemany("INSERT INTO rapla_entries (uid, initialSummary) VALUES (%s, %s)",
+						list((x.get('uid'), x.get('new_summary')) for x in newEvents))
+		conn.commit()
+		changeData = list(
+			(x.get('isDeleted'), x.get('new_summary'), x.get('new_description'), x.get('new_start'), x.get('new_end'),
+			 x.get('new_last_modified'),
+			 x.get('new_created'), x.get('new_location'), x.get('new_organizer'), x.get('new_categories'),
+			 x.get('new_recurring'), x.get('uid')
+			 ) for x in newEvents)
+		changeQuery = """
+			INSERT INTO rapla_changes
+			(isDeleted, new_summary, new_description, new_start, new_end, new_last_modified, new_created, new_location, new_organizer, new_categories, new_recurring, entry_id)
+			VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, (SELECT entry_id FROM rapla_entries WHERE uid = %s))
+		"""
+		cur.executemany(changeQuery, changeData)
+		conn.commit()
+
+	if updatedEvents:
+		# Insert changes to existing events if there are any
+		changeQuery = """
+			INSERT INTO rapla_changes
+			(isDeleted, new_summary, new_description, new_start, new_end, new_last_modified, new_created, new_location, new_organizer, new_categories, new_recurring, entry_id)
+			VALUES (False, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, (SELECT entry_id FROM rapla_entries WHERE uid = %s))
+		"""
+		cur.executemany(changeQuery, updatedEvents)
+		conn.commit()
+
+	if deletedEvents:
+		cur.executemany(
+			"INSERT INTO rapla_changes (entry_id, isDeleted) VALUES ((SELECT entry_id FROM rapla_entries WHERE uid = %s), 1)",
+			deletedEvents)
+		conn.commit()
+
+	cur.close()
+	conn.close()
+
+
+def removeTimezone(date: datetime) -> datetime:
+	"""
+	Removes the timezone part of a datetime object
+	:param date: The datetime object to adjust
+	:return: The adjusted object
+	"""
+	return datetime.strptime(date.strftime('%Y-%m-%d %H:%M:%S'), '%Y-%m-%d %H:%M:%S')
+
+
+if __name__ == "__main__":
+	crawl()
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,2 @@
+pymysql
+git+git://github.com/irgangla/icalevents@master#egg=icalevents