🎉 Initial commit

This commit is contained in:
Patrick Müller 2021-02-28 13:11:14 +01:00
commit 430769bd33
4 changed files with 249 additions and 0 deletions

6
README.md Normal file
View File

@ -0,0 +1,6 @@
# DHBW-RaPla-Vorratsdatenspeicherung
Dieses Projekt dient dazu, den Vorlesungsplan von TINF19B4 zu vorratsdatenspeichern.
Dazu wird das Script stündlich automatisiert aufgerufen und speichert jede Änderung an Vorlesungen
in SQL ab. Dadurch können auch Details von Events, die bereits aus RaPla gelöscht wurden, abgerufen werden und
zusätzlich haben wir eine Übersicht über alle Änderungen.

32
SQLConnectionHandler.py Normal file
View File

@ -0,0 +1,32 @@
import pymysql
import os
import logging
def getConnection() -> pymysql.Connection:
"""
Get a connection to SQL.
This function is used on the vServer for local testing
@return: pymysql connection object
"""
try:
if os.environ['IS_VSERVER'] == 'true':
conn = pymysql.connect(
user=os.environ['vServer_SQL_User'],
password=os.environ['vServer_SQL_Password'],
host='localhost',
port=3306,
database='DHBW-RaPla-Vorratsdatenspeicherung'
)
else:
conn = pymysql.connect(
user=os.environ['PADDY_SQL_USER'],
password=os.environ['PADDY_SQL_PASSWORD'],
host=os.environ['SQL_SERVER'],
port=3306,
database='DHBW-RaPla-Vorratsdatenspeicherung'
)
return conn
except pymysql.Error as e:
logging.error('SQL Connection error: %s', e)
return None

209
main.py Normal file
View File

@ -0,0 +1,209 @@
from collections import ChainMap
from icalevents import icalevents
import sys
from httplib2 import Http
from datetime import datetime, timedelta
import SQLConnectionHandler
def crawl():
if sys.platform == 'win32':
http = Http(disable_ssl_certificate_validation=True)
else:
http = Http()
# ______ __ __
# / ____/__ / /_ ____ ___ _ __ ___ _ _____ ____ / /______
# / / __/ _ \/ __/ / __ \/ _ \ | /| / / / _ \ | / / _ \/ __ \/ __/ ___/
# / /_/ / __/ /_ / / / / __/ |/ |/ / / __/ |/ / __/ / / / /_(__ )
# \____/\___/\__/ /_/ /_/\___/|__/|__/ \___/|___/\___/_/ /_/\__/____/
# Get events in the next year from RaPla
events = icalevents.events(url='https://rapla.dhbw-karlsruhe.de/rapla?page=ical&user=eisenbiegler&file=TINF19B4',
http=http, start=datetime.strptime('2010-01-01', '%Y-%m-%d'),
end=datetime.now() + timedelta(days=365))
# ______ __ __ __ __ __ __ ____ _____ ____ __
# / ____/__ / /_ / /___ _/ /____ _____/ /_ ____/ /___ _/ /_____ _ / __/________ ____ ___ / ___// __ \ / /
# / / __/ _ \/ __/ / / __ `/ __/ _ \/ ___/ __/ / __ / __ `/ __/ __ `/ / /_/ ___/ __ \/ __ `__ \ \__ \/ / / / / /
# / /_/ / __/ /_ / / /_/ / /_/ __(__ ) /_ / /_/ / /_/ / /_/ /_/ / / __/ / / /_/ / / / / / / ___/ / /_/ / / /___
# \____/\___/\__/ /_/\__,_/\__/\___/____/\__/ \__,_/\__,_/\__/\__,_/ /_/ /_/ \____/_/ /_/ /_/ /____/\___\_\/_____/
# Select existing event UIDs from RaPla
conn = SQLConnectionHandler.getConnection()
cur = conn.cursor()
cur.execute("SELECT uid FROM rapla_entries")
uids = cur.fetchall()
uids = list(x[0] for x in uids) # Otherwise, uids would be a list of tuples
cur.execute("""
WITH summary AS (
SELECT ch.*,
ROW_NUMBER() OVER(PARTITION BY ch.entry_id
ORDER BY ch.change_id DESC) AS rk
FROM rapla_changes ch)
SELECT s.*, entr.uid
FROM summary s
LEFT OUTER JOIN rapla_entries entr ON s.entry_id = entr.entry_id
WHERE s.rk = 1;
""")
changes = cur.fetchall()
# ____ __ __
# / __ \________ ____ ____ _________ ____/ /___ _/ /_____ _
# / /_/ / ___/ _ \/ __ \/ __ `/ ___/ _ \ / __ / __ `/ __/ __ `/
# / ____/ / / __/ /_/ / /_/ / / / __/ / /_/ / /_/ / /_/ /_/ /
# /_/ /_/ \___/ .___/\__,_/_/ \___/ \__,_/\__,_/\__/\__,_/
# /_/
# The following function simply creates a dict with the event uid as key and all other info as value from the tuples
# Say we have the list of tuples [(1, 'abc'), (2, 'def')] where abc and def are the uids. The the function would generate
# a dict in the form of {'abc': (1, 'abc'), 'def': (1, 'def')}
# The map function basically returns a list of small dicts and the ChainMap combines them to one big dict
changeDict = dict(ChainMap(*map(lambda x: {x[15]: x}, changes)))
# Change booleans back to boolean values as they come from SQL as integers
for change in changeDict:
workingList = list(changeDict[change])
workingList[13] = changeDict[change][13] == 1
workingList[3] = changeDict[change][3] == 1
changeDict[change] = tuple(workingList)
newEvents = []
updatedEvents = []
# Append number of recurring event to UID so we can tell them apart
# Also removes the timezone from all the datetime objects
evtIdx = {}
for event in events:
if not event.uid in evtIdx.keys():
# Event not known yet
uid = event.uid
event.uid = uid + '---0'
evtIdx[uid] = 1
else:
uid = event.uid
event.uid = uid + '---' + str(evtIdx[uid])
evtIdx[uid] += 1
# Remove timezones
event.start = removeTimezone(event.start)
event.end = removeTimezone(event.end)
event.last_modified = removeTimezone(event.last_modified)
event.created = removeTimezone(event.created)
# ________ __ ____ __
# / ____/ /_ ___ _____/ /__ / __/___ _____ _____/ /_ ____ _____ ____ ____ _____
# / / / __ \/ _ \/ ___/ //_/ / /_/ __ \/ ___/ / ___/ __ \/ __ `/ __ \/ __ `/ _ \/ ___/
# / /___/ / / / __/ /__/ ,< / __/ /_/ / / / /__/ / / / /_/ / / / / /_/ / __(__ )
# \____/_/ /_/\___/\___/_/|_| /_/ \____/_/ \___/_/ /_/\__,_/_/ /_/\__, /\___/____/
# /____/
for event in events:
if not event.uid in uids:
# New event, create event entry and new changeset
if not event.uid in list(x.get('uid') for x in newEvents):
# Only add to list if this event is not in the list yet (can happen in case of recurring events)
evt = {
# TODO add checks for existing values
"uid": event.uid,
"isDeleted": False,
"new_summary": event.summary,
"new_description": event.description,
"new_start": event.start,
"new_end": event.end,
"new_last_modified": event.last_modified,
"new_created": event.created,
"new_location": event.location,
"new_organizer": event.organizer,
"new_categories": event.categories[0] if event.categories else '',
"new_recurring": event.recurring
}
newEvents.append(evt)
else:
# Event is known, create new changeset
hasChanges = False
latestKnownState = changeDict.get(event.uid)
changeSet = latestKnownState[4:14] # Relevant subset of latest know state data
newEventTuple = tuple(
[event.summary, event.description, event.start, event.end, event.last_modified, event.created,
event.location, event.organizer, (event.categories[0] if event.categories else ''), event.recurring,
event.uid])
# Check every value by looping over both the old and new tuples
for dataIndex in range(0, 10):
if newEventTuple[dataIndex] != changeSet[dataIndex]:
hasChanges = True
if hasChanges:
updatedEvents.append(newEventTuple)
# Now also check for deleted events
deletedEvents = []
for uid in uids:
# We have to check for every known uid if it still exists
# The following condition my be a bit overwhelming but it basically checks three things:
# 1. Is the uid in SQL but not in the list of fetched events? -> Has been deleted from RaPla?
# 2. If there is already a changeset for this event, is the latest known state that it is not deleted?
# 3. If there is no changeset for it yet, we can't check the latest known state so we just set it to deleted
# -> this basically can't ever happen with real data but it happened during testing and it doesn't hurt to let in in here
if uid not in list(x.uid for x in events) and (uid in changeDict.keys() and not changeDict[uid][3] or uid not in changeDict.keys()):
# Only insert if there is no 'deleted' record yet
deletedEvents.append(tuple([uid]))
# _ __ _ __ __ __ __ _____ ____ __
# | | / /____(_) /____ / /_ ____ ______/ /__ / /_____ / ___// __ \ / /
# | | /| / / ___/ / __/ _ \ / __ \/ __ `/ ___/ //_/ / __/ __ \ \__ \/ / / / / /
# | |/ |/ / / / / /_/ __/ / /_/ / /_/ / /__/ ,< / /_/ /_/ / ___/ / /_/ / / /___
# __/|__/_/ /_/\__/\___/ /_.___/\__,_/\___/_/|_| \__/\____/ /____/\___\_\/_____/
# Insert new events into rapla_entries table
if newEvents:
# Insert new events if there are any
cur.executemany("INSERT INTO rapla_entries (uid, initialSummary) VALUES (%s, %s)",
list((x.get('uid'), x.get('new_summary')) for x in newEvents))
conn.commit()
changeData = list(
(x.get('isDeleted'), x.get('new_summary'), x.get('new_description'), x.get('new_start'), x.get('new_end'),
x.get('new_last_modified'),
x.get('new_created'), x.get('new_location'), x.get('new_organizer'), x.get('new_categories'),
x.get('new_recurring'), x.get('uid')
) for x in newEvents)
changeQuery = """
INSERT INTO rapla_changes
(isDeleted, new_summary, new_description, new_start, new_end, new_last_modified, new_created, new_location, new_organizer, new_categories, new_recurring, entry_id)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, (SELECT entry_id FROM rapla_entries WHERE uid = %s))
"""
cur.executemany(changeQuery, changeData)
conn.commit()
if updatedEvents:
# Insert changes to existing events if there are any
changeQuery = """
INSERT INTO rapla_changes
(isDeleted, new_summary, new_description, new_start, new_end, new_last_modified, new_created, new_location, new_organizer, new_categories, new_recurring, entry_id)
VALUES (False, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, (SELECT entry_id FROM rapla_entries WHERE uid = %s))
"""
cur.executemany(changeQuery, updatedEvents)
conn.commit()
if deletedEvents:
cur.executemany(
"INSERT INTO rapla_changes (entry_id, isDeleted) VALUES ((SELECT entry_id FROM rapla_entries WHERE uid = %s), 1)",
deletedEvents)
conn.commit()
cur.close()
conn.close()
def removeTimezone(date: datetime) -> datetime:
"""
Removes the timezone part of a datetime object
:param date: The datetime object to adjust
:return: The adjusted object
"""
return datetime.strptime(date.strftime('%Y-%m-%d %H:%M:%S'), '%Y-%m-%d %H:%M:%S')
if __name__ == "__main__":
crawl()

2
requirements.txt Normal file
View File

@ -0,0 +1,2 @@
pymysql
git+git://github.com/irgangla/icalevents@master#egg=icalevents