🎉 Initial commit
This commit is contained in:
commit
430769bd33
6
README.md
Normal file
6
README.md
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
# DHBW-RaPla-Vorratsdatenspeicherung
|
||||||
|
|
||||||
|
Dieses Projekt dient dazu, den Vorlesungsplan von TINF19B4 zu vorratsdatenspeichern.
|
||||||
|
Dazu wird das Script stündlich automatisiert aufgerufen und speichert jede Änderung an Vorlesungen
|
||||||
|
in SQL ab. Dadurch können auch Details von Events, die bereits aus RaPla gelöscht wurden, abgerufen werden und
|
||||||
|
zusätzlich haben wir eine Übersicht über alle Änderungen.
|
32
SQLConnectionHandler.py
Normal file
32
SQLConnectionHandler.py
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
import pymysql
|
||||||
|
import os
|
||||||
|
import logging
|
||||||
|
|
||||||
|
def getConnection() -> pymysql.Connection:
|
||||||
|
"""
|
||||||
|
Get a connection to SQL.
|
||||||
|
This function is used on the vServer for local testing
|
||||||
|
@return: pymysql connection object
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if os.environ['IS_VSERVER'] == 'true':
|
||||||
|
conn = pymysql.connect(
|
||||||
|
user=os.environ['vServer_SQL_User'],
|
||||||
|
password=os.environ['vServer_SQL_Password'],
|
||||||
|
host='localhost',
|
||||||
|
port=3306,
|
||||||
|
database='DHBW-RaPla-Vorratsdatenspeicherung'
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
conn = pymysql.connect(
|
||||||
|
user=os.environ['PADDY_SQL_USER'],
|
||||||
|
password=os.environ['PADDY_SQL_PASSWORD'],
|
||||||
|
host=os.environ['SQL_SERVER'],
|
||||||
|
port=3306,
|
||||||
|
database='DHBW-RaPla-Vorratsdatenspeicherung'
|
||||||
|
)
|
||||||
|
|
||||||
|
return conn
|
||||||
|
except pymysql.Error as e:
|
||||||
|
logging.error('SQL Connection error: %s', e)
|
||||||
|
return None
|
209
main.py
Normal file
209
main.py
Normal file
|
@ -0,0 +1,209 @@
|
||||||
|
from collections import ChainMap
|
||||||
|
|
||||||
|
from icalevents import icalevents
|
||||||
|
import sys
|
||||||
|
from httplib2 import Http
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
import SQLConnectionHandler
|
||||||
|
|
||||||
|
|
||||||
|
def crawl():
|
||||||
|
if sys.platform == 'win32':
|
||||||
|
http = Http(disable_ssl_certificate_validation=True)
|
||||||
|
else:
|
||||||
|
http = Http()
|
||||||
|
|
||||||
|
# ______ __ __
|
||||||
|
# / ____/__ / /_ ____ ___ _ __ ___ _ _____ ____ / /______
|
||||||
|
# / / __/ _ \/ __/ / __ \/ _ \ | /| / / / _ \ | / / _ \/ __ \/ __/ ___/
|
||||||
|
# / /_/ / __/ /_ / / / / __/ |/ |/ / / __/ |/ / __/ / / / /_(__ )
|
||||||
|
# \____/\___/\__/ /_/ /_/\___/|__/|__/ \___/|___/\___/_/ /_/\__/____/
|
||||||
|
|
||||||
|
# Get events in the next year from RaPla
|
||||||
|
events = icalevents.events(url='https://rapla.dhbw-karlsruhe.de/rapla?page=ical&user=eisenbiegler&file=TINF19B4',
|
||||||
|
http=http, start=datetime.strptime('2010-01-01', '%Y-%m-%d'),
|
||||||
|
end=datetime.now() + timedelta(days=365))
|
||||||
|
|
||||||
|
# ______ __ __ __ __ __ __ ____ _____ ____ __
|
||||||
|
# / ____/__ / /_ / /___ _/ /____ _____/ /_ ____/ /___ _/ /_____ _ / __/________ ____ ___ / ___// __ \ / /
|
||||||
|
# / / __/ _ \/ __/ / / __ `/ __/ _ \/ ___/ __/ / __ / __ `/ __/ __ `/ / /_/ ___/ __ \/ __ `__ \ \__ \/ / / / / /
|
||||||
|
# / /_/ / __/ /_ / / /_/ / /_/ __(__ ) /_ / /_/ / /_/ / /_/ /_/ / / __/ / / /_/ / / / / / / ___/ / /_/ / / /___
|
||||||
|
# \____/\___/\__/ /_/\__,_/\__/\___/____/\__/ \__,_/\__,_/\__/\__,_/ /_/ /_/ \____/_/ /_/ /_/ /____/\___\_\/_____/
|
||||||
|
|
||||||
|
# Select existing event UIDs from RaPla
|
||||||
|
conn = SQLConnectionHandler.getConnection()
|
||||||
|
cur = conn.cursor()
|
||||||
|
cur.execute("SELECT uid FROM rapla_entries")
|
||||||
|
uids = cur.fetchall()
|
||||||
|
uids = list(x[0] for x in uids) # Otherwise, uids would be a list of tuples
|
||||||
|
cur.execute("""
|
||||||
|
WITH summary AS (
|
||||||
|
SELECT ch.*,
|
||||||
|
ROW_NUMBER() OVER(PARTITION BY ch.entry_id
|
||||||
|
ORDER BY ch.change_id DESC) AS rk
|
||||||
|
FROM rapla_changes ch)
|
||||||
|
SELECT s.*, entr.uid
|
||||||
|
FROM summary s
|
||||||
|
LEFT OUTER JOIN rapla_entries entr ON s.entry_id = entr.entry_id
|
||||||
|
WHERE s.rk = 1;
|
||||||
|
""")
|
||||||
|
changes = cur.fetchall()
|
||||||
|
|
||||||
|
# ____ __ __
|
||||||
|
# / __ \________ ____ ____ _________ ____/ /___ _/ /_____ _
|
||||||
|
# / /_/ / ___/ _ \/ __ \/ __ `/ ___/ _ \ / __ / __ `/ __/ __ `/
|
||||||
|
# / ____/ / / __/ /_/ / /_/ / / / __/ / /_/ / /_/ / /_/ /_/ /
|
||||||
|
# /_/ /_/ \___/ .___/\__,_/_/ \___/ \__,_/\__,_/\__/\__,_/
|
||||||
|
# /_/
|
||||||
|
|
||||||
|
# The following function simply creates a dict with the event uid as key and all other info as value from the tuples
|
||||||
|
# Say we have the list of tuples [(1, 'abc'), (2, 'def')] where abc and def are the uids. The the function would generate
|
||||||
|
# a dict in the form of {'abc': (1, 'abc'), 'def': (1, 'def')}
|
||||||
|
# The map function basically returns a list of small dicts and the ChainMap combines them to one big dict
|
||||||
|
changeDict = dict(ChainMap(*map(lambda x: {x[15]: x}, changes)))
|
||||||
|
|
||||||
|
# Change booleans back to boolean values as they come from SQL as integers
|
||||||
|
for change in changeDict:
|
||||||
|
workingList = list(changeDict[change])
|
||||||
|
workingList[13] = changeDict[change][13] == 1
|
||||||
|
workingList[3] = changeDict[change][3] == 1
|
||||||
|
changeDict[change] = tuple(workingList)
|
||||||
|
|
||||||
|
newEvents = []
|
||||||
|
updatedEvents = []
|
||||||
|
|
||||||
|
# Append number of recurring event to UID so we can tell them apart
|
||||||
|
# Also removes the timezone from all the datetime objects
|
||||||
|
evtIdx = {}
|
||||||
|
for event in events:
|
||||||
|
if not event.uid in evtIdx.keys():
|
||||||
|
# Event not known yet
|
||||||
|
uid = event.uid
|
||||||
|
event.uid = uid + '---0'
|
||||||
|
evtIdx[uid] = 1
|
||||||
|
else:
|
||||||
|
uid = event.uid
|
||||||
|
event.uid = uid + '---' + str(evtIdx[uid])
|
||||||
|
evtIdx[uid] += 1
|
||||||
|
# Remove timezones
|
||||||
|
event.start = removeTimezone(event.start)
|
||||||
|
event.end = removeTimezone(event.end)
|
||||||
|
event.last_modified = removeTimezone(event.last_modified)
|
||||||
|
event.created = removeTimezone(event.created)
|
||||||
|
|
||||||
|
# ________ __ ____ __
|
||||||
|
# / ____/ /_ ___ _____/ /__ / __/___ _____ _____/ /_ ____ _____ ____ ____ _____
|
||||||
|
# / / / __ \/ _ \/ ___/ //_/ / /_/ __ \/ ___/ / ___/ __ \/ __ `/ __ \/ __ `/ _ \/ ___/
|
||||||
|
# / /___/ / / / __/ /__/ ,< / __/ /_/ / / / /__/ / / / /_/ / / / / /_/ / __(__ )
|
||||||
|
# \____/_/ /_/\___/\___/_/|_| /_/ \____/_/ \___/_/ /_/\__,_/_/ /_/\__, /\___/____/
|
||||||
|
# /____/
|
||||||
|
|
||||||
|
for event in events:
|
||||||
|
if not event.uid in uids:
|
||||||
|
# New event, create event entry and new changeset
|
||||||
|
if not event.uid in list(x.get('uid') for x in newEvents):
|
||||||
|
# Only add to list if this event is not in the list yet (can happen in case of recurring events)
|
||||||
|
evt = {
|
||||||
|
# TODO add checks for existing values
|
||||||
|
"uid": event.uid,
|
||||||
|
"isDeleted": False,
|
||||||
|
"new_summary": event.summary,
|
||||||
|
"new_description": event.description,
|
||||||
|
"new_start": event.start,
|
||||||
|
"new_end": event.end,
|
||||||
|
"new_last_modified": event.last_modified,
|
||||||
|
"new_created": event.created,
|
||||||
|
"new_location": event.location,
|
||||||
|
"new_organizer": event.organizer,
|
||||||
|
"new_categories": event.categories[0] if event.categories else '',
|
||||||
|
"new_recurring": event.recurring
|
||||||
|
}
|
||||||
|
newEvents.append(evt)
|
||||||
|
else:
|
||||||
|
# Event is known, create new changeset
|
||||||
|
hasChanges = False
|
||||||
|
latestKnownState = changeDict.get(event.uid)
|
||||||
|
changeSet = latestKnownState[4:14] # Relevant subset of latest know state data
|
||||||
|
newEventTuple = tuple(
|
||||||
|
[event.summary, event.description, event.start, event.end, event.last_modified, event.created,
|
||||||
|
event.location, event.organizer, (event.categories[0] if event.categories else ''), event.recurring,
|
||||||
|
event.uid])
|
||||||
|
|
||||||
|
# Check every value by looping over both the old and new tuples
|
||||||
|
for dataIndex in range(0, 10):
|
||||||
|
if newEventTuple[dataIndex] != changeSet[dataIndex]:
|
||||||
|
hasChanges = True
|
||||||
|
|
||||||
|
if hasChanges:
|
||||||
|
updatedEvents.append(newEventTuple)
|
||||||
|
|
||||||
|
# Now also check for deleted events
|
||||||
|
deletedEvents = []
|
||||||
|
for uid in uids:
|
||||||
|
# We have to check for every known uid if it still exists
|
||||||
|
# The following condition my be a bit overwhelming but it basically checks three things:
|
||||||
|
# 1. Is the uid in SQL but not in the list of fetched events? -> Has been deleted from RaPla?
|
||||||
|
# 2. If there is already a changeset for this event, is the latest known state that it is not deleted?
|
||||||
|
# 3. If there is no changeset for it yet, we can't check the latest known state so we just set it to deleted
|
||||||
|
# -> this basically can't ever happen with real data but it happened during testing and it doesn't hurt to let in in here
|
||||||
|
if uid not in list(x.uid for x in events) and (uid in changeDict.keys() and not changeDict[uid][3] or uid not in changeDict.keys()):
|
||||||
|
# Only insert if there is no 'deleted' record yet
|
||||||
|
deletedEvents.append(tuple([uid]))
|
||||||
|
|
||||||
|
# _ __ _ __ __ __ __ _____ ____ __
|
||||||
|
# | | / /____(_) /____ / /_ ____ ______/ /__ / /_____ / ___// __ \ / /
|
||||||
|
# | | /| / / ___/ / __/ _ \ / __ \/ __ `/ ___/ //_/ / __/ __ \ \__ \/ / / / / /
|
||||||
|
# | |/ |/ / / / / /_/ __/ / /_/ / /_/ / /__/ ,< / /_/ /_/ / ___/ / /_/ / / /___
|
||||||
|
# __/|__/_/ /_/\__/\___/ /_.___/\__,_/\___/_/|_| \__/\____/ /____/\___\_\/_____/
|
||||||
|
|
||||||
|
# Insert new events into rapla_entries table
|
||||||
|
if newEvents:
|
||||||
|
# Insert new events if there are any
|
||||||
|
cur.executemany("INSERT INTO rapla_entries (uid, initialSummary) VALUES (%s, %s)",
|
||||||
|
list((x.get('uid'), x.get('new_summary')) for x in newEvents))
|
||||||
|
conn.commit()
|
||||||
|
changeData = list(
|
||||||
|
(x.get('isDeleted'), x.get('new_summary'), x.get('new_description'), x.get('new_start'), x.get('new_end'),
|
||||||
|
x.get('new_last_modified'),
|
||||||
|
x.get('new_created'), x.get('new_location'), x.get('new_organizer'), x.get('new_categories'),
|
||||||
|
x.get('new_recurring'), x.get('uid')
|
||||||
|
) for x in newEvents)
|
||||||
|
changeQuery = """
|
||||||
|
INSERT INTO rapla_changes
|
||||||
|
(isDeleted, new_summary, new_description, new_start, new_end, new_last_modified, new_created, new_location, new_organizer, new_categories, new_recurring, entry_id)
|
||||||
|
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, (SELECT entry_id FROM rapla_entries WHERE uid = %s))
|
||||||
|
"""
|
||||||
|
cur.executemany(changeQuery, changeData)
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
if updatedEvents:
|
||||||
|
# Insert changes to existing events if there are any
|
||||||
|
changeQuery = """
|
||||||
|
INSERT INTO rapla_changes
|
||||||
|
(isDeleted, new_summary, new_description, new_start, new_end, new_last_modified, new_created, new_location, new_organizer, new_categories, new_recurring, entry_id)
|
||||||
|
VALUES (False, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, (SELECT entry_id FROM rapla_entries WHERE uid = %s))
|
||||||
|
"""
|
||||||
|
cur.executemany(changeQuery, updatedEvents)
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
if deletedEvents:
|
||||||
|
cur.executemany(
|
||||||
|
"INSERT INTO rapla_changes (entry_id, isDeleted) VALUES ((SELECT entry_id FROM rapla_entries WHERE uid = %s), 1)",
|
||||||
|
deletedEvents)
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
cur.close()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def removeTimezone(date: datetime) -> datetime:
|
||||||
|
"""
|
||||||
|
Removes the timezone part of a datetime object
|
||||||
|
:param date: The datetime object to adjust
|
||||||
|
:return: The adjusted object
|
||||||
|
"""
|
||||||
|
return datetime.strptime(date.strftime('%Y-%m-%d %H:%M:%S'), '%Y-%m-%d %H:%M:%S')
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
crawl()
|
2
requirements.txt
Normal file
2
requirements.txt
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
pymysql
|
||||||
|
git+git://github.com/irgangla/icalevents@master#egg=icalevents
|
Loading…
Reference in New Issue
Block a user