From 430769bd333e51da96184ffb3ff1e45ce740feaf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patrick=20M=C3=BCller?= Date: Sun, 28 Feb 2021 13:11:14 +0100 Subject: [PATCH] :tada: Initial commit --- README.md | 6 ++ SQLConnectionHandler.py | 32 ++++++ main.py | 209 ++++++++++++++++++++++++++++++++++++++++ requirements.txt | 2 + 4 files changed, 249 insertions(+) create mode 100644 README.md create mode 100644 SQLConnectionHandler.py create mode 100644 main.py create mode 100644 requirements.txt diff --git a/README.md b/README.md new file mode 100644 index 0000000..2b87296 --- /dev/null +++ b/README.md @@ -0,0 +1,6 @@ +# DHBW-RaPla-Vorratsdatenspeicherung + +Dieses Projekt dient dazu, den Vorlesungsplan von TINF19B4 zu vorratsdatenspeichern. +Dazu wird das Script stündlich automatisiert aufgerufen und speichert jede Änderung an Vorlesungen +in SQL ab. Dadurch können auch Details von Events, die bereits aus RaPla gelöscht wurden, abgerufen werden und +zusätzlich haben wir eine Übersicht über alle Änderungen. \ No newline at end of file diff --git a/SQLConnectionHandler.py b/SQLConnectionHandler.py new file mode 100644 index 0000000..069fc6f --- /dev/null +++ b/SQLConnectionHandler.py @@ -0,0 +1,32 @@ +import pymysql +import os +import logging + +def getConnection() -> pymysql.Connection: + """ + Get a connection to SQL. + This function is used on the vServer for local testing + @return: pymysql connection object + """ + try: + if os.environ['IS_VSERVER'] == 'true': + conn = pymysql.connect( + user=os.environ['vServer_SQL_User'], + password=os.environ['vServer_SQL_Password'], + host='localhost', + port=3306, + database='DHBW-RaPla-Vorratsdatenspeicherung' + ) + else: + conn = pymysql.connect( + user=os.environ['PADDY_SQL_USER'], + password=os.environ['PADDY_SQL_PASSWORD'], + host=os.environ['SQL_SERVER'], + port=3306, + database='DHBW-RaPla-Vorratsdatenspeicherung' + ) + + return conn + except pymysql.Error as e: + logging.error('SQL Connection error: %s', e) + return None \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..c59f5a8 --- /dev/null +++ b/main.py @@ -0,0 +1,209 @@ +from collections import ChainMap + +from icalevents import icalevents +import sys +from httplib2 import Http +from datetime import datetime, timedelta +import SQLConnectionHandler + + +def crawl(): + if sys.platform == 'win32': + http = Http(disable_ssl_certificate_validation=True) + else: + http = Http() + + # ______ __ __ + # / ____/__ / /_ ____ ___ _ __ ___ _ _____ ____ / /______ + # / / __/ _ \/ __/ / __ \/ _ \ | /| / / / _ \ | / / _ \/ __ \/ __/ ___/ + # / /_/ / __/ /_ / / / / __/ |/ |/ / / __/ |/ / __/ / / / /_(__ ) + # \____/\___/\__/ /_/ /_/\___/|__/|__/ \___/|___/\___/_/ /_/\__/____/ + + # Get events in the next year from RaPla + events = icalevents.events(url='https://rapla.dhbw-karlsruhe.de/rapla?page=ical&user=eisenbiegler&file=TINF19B4', + http=http, start=datetime.strptime('2010-01-01', '%Y-%m-%d'), + end=datetime.now() + timedelta(days=365)) + + # ______ __ __ __ __ __ __ ____ _____ ____ __ + # / ____/__ / /_ / /___ _/ /____ _____/ /_ ____/ /___ _/ /_____ _ / __/________ ____ ___ / ___// __ \ / / + # / / __/ _ \/ __/ / / __ `/ __/ _ \/ ___/ __/ / __ / __ `/ __/ __ `/ / /_/ ___/ __ \/ __ `__ \ \__ \/ / / / / / + # / /_/ / __/ /_ / / /_/ / /_/ __(__ ) /_ / /_/ / /_/ / /_/ /_/ / / __/ / / /_/ / / / / / / ___/ / /_/ / / /___ + # \____/\___/\__/ /_/\__,_/\__/\___/____/\__/ \__,_/\__,_/\__/\__,_/ /_/ /_/ \____/_/ /_/ /_/ /____/\___\_\/_____/ + + # Select existing event UIDs from RaPla + conn = SQLConnectionHandler.getConnection() + cur = conn.cursor() + cur.execute("SELECT uid FROM rapla_entries") + uids = cur.fetchall() + uids = list(x[0] for x in uids) # Otherwise, uids would be a list of tuples + cur.execute(""" + WITH summary AS ( + SELECT ch.*, + ROW_NUMBER() OVER(PARTITION BY ch.entry_id + ORDER BY ch.change_id DESC) AS rk + FROM rapla_changes ch) + SELECT s.*, entr.uid + FROM summary s + LEFT OUTER JOIN rapla_entries entr ON s.entry_id = entr.entry_id + WHERE s.rk = 1; + """) + changes = cur.fetchall() + + # ____ __ __ + # / __ \________ ____ ____ _________ ____/ /___ _/ /_____ _ + # / /_/ / ___/ _ \/ __ \/ __ `/ ___/ _ \ / __ / __ `/ __/ __ `/ + # / ____/ / / __/ /_/ / /_/ / / / __/ / /_/ / /_/ / /_/ /_/ / + # /_/ /_/ \___/ .___/\__,_/_/ \___/ \__,_/\__,_/\__/\__,_/ + # /_/ + + # The following function simply creates a dict with the event uid as key and all other info as value from the tuples + # Say we have the list of tuples [(1, 'abc'), (2, 'def')] where abc and def are the uids. The the function would generate + # a dict in the form of {'abc': (1, 'abc'), 'def': (1, 'def')} + # The map function basically returns a list of small dicts and the ChainMap combines them to one big dict + changeDict = dict(ChainMap(*map(lambda x: {x[15]: x}, changes))) + + # Change booleans back to boolean values as they come from SQL as integers + for change in changeDict: + workingList = list(changeDict[change]) + workingList[13] = changeDict[change][13] == 1 + workingList[3] = changeDict[change][3] == 1 + changeDict[change] = tuple(workingList) + + newEvents = [] + updatedEvents = [] + + # Append number of recurring event to UID so we can tell them apart + # Also removes the timezone from all the datetime objects + evtIdx = {} + for event in events: + if not event.uid in evtIdx.keys(): + # Event not known yet + uid = event.uid + event.uid = uid + '---0' + evtIdx[uid] = 1 + else: + uid = event.uid + event.uid = uid + '---' + str(evtIdx[uid]) + evtIdx[uid] += 1 + # Remove timezones + event.start = removeTimezone(event.start) + event.end = removeTimezone(event.end) + event.last_modified = removeTimezone(event.last_modified) + event.created = removeTimezone(event.created) + + # ________ __ ____ __ + # / ____/ /_ ___ _____/ /__ / __/___ _____ _____/ /_ ____ _____ ____ ____ _____ + # / / / __ \/ _ \/ ___/ //_/ / /_/ __ \/ ___/ / ___/ __ \/ __ `/ __ \/ __ `/ _ \/ ___/ + # / /___/ / / / __/ /__/ ,< / __/ /_/ / / / /__/ / / / /_/ / / / / /_/ / __(__ ) + # \____/_/ /_/\___/\___/_/|_| /_/ \____/_/ \___/_/ /_/\__,_/_/ /_/\__, /\___/____/ + # /____/ + + for event in events: + if not event.uid in uids: + # New event, create event entry and new changeset + if not event.uid in list(x.get('uid') for x in newEvents): + # Only add to list if this event is not in the list yet (can happen in case of recurring events) + evt = { + # TODO add checks for existing values + "uid": event.uid, + "isDeleted": False, + "new_summary": event.summary, + "new_description": event.description, + "new_start": event.start, + "new_end": event.end, + "new_last_modified": event.last_modified, + "new_created": event.created, + "new_location": event.location, + "new_organizer": event.organizer, + "new_categories": event.categories[0] if event.categories else '', + "new_recurring": event.recurring + } + newEvents.append(evt) + else: + # Event is known, create new changeset + hasChanges = False + latestKnownState = changeDict.get(event.uid) + changeSet = latestKnownState[4:14] # Relevant subset of latest know state data + newEventTuple = tuple( + [event.summary, event.description, event.start, event.end, event.last_modified, event.created, + event.location, event.organizer, (event.categories[0] if event.categories else ''), event.recurring, + event.uid]) + + # Check every value by looping over both the old and new tuples + for dataIndex in range(0, 10): + if newEventTuple[dataIndex] != changeSet[dataIndex]: + hasChanges = True + + if hasChanges: + updatedEvents.append(newEventTuple) + + # Now also check for deleted events + deletedEvents = [] + for uid in uids: + # We have to check for every known uid if it still exists + # The following condition my be a bit overwhelming but it basically checks three things: + # 1. Is the uid in SQL but not in the list of fetched events? -> Has been deleted from RaPla? + # 2. If there is already a changeset for this event, is the latest known state that it is not deleted? + # 3. If there is no changeset for it yet, we can't check the latest known state so we just set it to deleted + # -> this basically can't ever happen with real data but it happened during testing and it doesn't hurt to let in in here + if uid not in list(x.uid for x in events) and (uid in changeDict.keys() and not changeDict[uid][3] or uid not in changeDict.keys()): + # Only insert if there is no 'deleted' record yet + deletedEvents.append(tuple([uid])) + + # _ __ _ __ __ __ __ _____ ____ __ + # | | / /____(_) /____ / /_ ____ ______/ /__ / /_____ / ___// __ \ / / + # | | /| / / ___/ / __/ _ \ / __ \/ __ `/ ___/ //_/ / __/ __ \ \__ \/ / / / / / + # | |/ |/ / / / / /_/ __/ / /_/ / /_/ / /__/ ,< / /_/ /_/ / ___/ / /_/ / / /___ + # __/|__/_/ /_/\__/\___/ /_.___/\__,_/\___/_/|_| \__/\____/ /____/\___\_\/_____/ + + # Insert new events into rapla_entries table + if newEvents: + # Insert new events if there are any + cur.executemany("INSERT INTO rapla_entries (uid, initialSummary) VALUES (%s, %s)", + list((x.get('uid'), x.get('new_summary')) for x in newEvents)) + conn.commit() + changeData = list( + (x.get('isDeleted'), x.get('new_summary'), x.get('new_description'), x.get('new_start'), x.get('new_end'), + x.get('new_last_modified'), + x.get('new_created'), x.get('new_location'), x.get('new_organizer'), x.get('new_categories'), + x.get('new_recurring'), x.get('uid') + ) for x in newEvents) + changeQuery = """ + INSERT INTO rapla_changes + (isDeleted, new_summary, new_description, new_start, new_end, new_last_modified, new_created, new_location, new_organizer, new_categories, new_recurring, entry_id) + VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, (SELECT entry_id FROM rapla_entries WHERE uid = %s)) + """ + cur.executemany(changeQuery, changeData) + conn.commit() + + if updatedEvents: + # Insert changes to existing events if there are any + changeQuery = """ + INSERT INTO rapla_changes + (isDeleted, new_summary, new_description, new_start, new_end, new_last_modified, new_created, new_location, new_organizer, new_categories, new_recurring, entry_id) + VALUES (False, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, (SELECT entry_id FROM rapla_entries WHERE uid = %s)) + """ + cur.executemany(changeQuery, updatedEvents) + conn.commit() + + if deletedEvents: + cur.executemany( + "INSERT INTO rapla_changes (entry_id, isDeleted) VALUES ((SELECT entry_id FROM rapla_entries WHERE uid = %s), 1)", + deletedEvents) + conn.commit() + + cur.close() + conn.close() + + +def removeTimezone(date: datetime) -> datetime: + """ + Removes the timezone part of a datetime object + :param date: The datetime object to adjust + :return: The adjusted object + """ + return datetime.strptime(date.strftime('%Y-%m-%d %H:%M:%S'), '%Y-%m-%d %H:%M:%S') + + +if __name__ == "__main__": + crawl() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..ea5248e --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +pymysql +git+git://github.com/irgangla/icalevents@master#egg=icalevents \ No newline at end of file