from collections import ChainMap from icalevents import icalevents import sys from httplib2 import Http from datetime import datetime, timedelta import SQLConnectionHandler def crawl(): if sys.platform == 'win32': http = Http(disable_ssl_certificate_validation=True) else: http = Http() # ______ __ __ # / ____/__ / /_ ____ ___ _ __ ___ _ _____ ____ / /______ # / / __/ _ \/ __/ / __ \/ _ \ | /| / / / _ \ | / / _ \/ __ \/ __/ ___/ # / /_/ / __/ /_ / / / / __/ |/ |/ / / __/ |/ / __/ / / / /_(__ ) # \____/\___/\__/ /_/ /_/\___/|__/|__/ \___/|___/\___/_/ /_/\__/____/ # Get events in the next year from RaPla try: events = icalevents.events( url='https://rapla.dhbw-karlsruhe.de/rapla?page=ical&user=eisenbiegler&file=TINF19B4', http=http, start=datetime.strptime('2010-01-01', '%Y-%m-%d'), end=datetime.strptime((datetime.now() + timedelta(days=365)).strftime('%Y-%m-%d'), '%Y-%m-%d')) except ValueError as error: # Value error sometimes happens because of a problem with dateutil and timezones exit(1) # ______ __ __ __ __ __ __ ____ _____ ____ __ # / ____/__ / /_ / /___ _/ /____ _____/ /_ ____/ /___ _/ /_____ _ / __/________ ____ ___ / ___// __ \ / / # / / __/ _ \/ __/ / / __ `/ __/ _ \/ ___/ __/ / __ / __ `/ __/ __ `/ / /_/ ___/ __ \/ __ `__ \ \__ \/ / / / / / # / /_/ / __/ /_ / / /_/ / /_/ __(__ ) /_ / /_/ / /_/ / /_/ /_/ / / __/ / / /_/ / / / / / / ___/ / /_/ / / /___ # \____/\___/\__/ /_/\__,_/\__/\___/____/\__/ \__,_/\__,_/\__/\__,_/ /_/ /_/ \____/_/ /_/ /_/ /____/\___\_\/_____/ # Select existing event UIDs from RaPla conn = SQLConnectionHandler.getConnection() cur = conn.cursor() cur.execute("SELECT uid FROM rapla_entries") uids = cur.fetchall() uids = list(x[0] for x in uids) # Otherwise, uids would be a list of tuples cur.execute(""" WITH summary AS ( SELECT ch.*, ROW_NUMBER() OVER(PARTITION BY ch.entry_id ORDER BY ch.change_id DESC) AS rk FROM rapla_changes ch) SELECT s.*, entr.uid FROM summary s LEFT OUTER JOIN rapla_entries entr ON s.entry_id = entr.entry_id WHERE s.rk = 1; """) changes = cur.fetchall() # ____ __ __ # / __ \________ ____ ____ _________ ____/ /___ _/ /_____ _ # / /_/ / ___/ _ \/ __ \/ __ `/ ___/ _ \ / __ / __ `/ __/ __ `/ # / ____/ / / __/ /_/ / /_/ / / / __/ / /_/ / /_/ / /_/ /_/ / # /_/ /_/ \___/ .___/\__,_/_/ \___/ \__,_/\__,_/\__/\__,_/ # /_/ # The following function simply creates a dict with the event uid as key and all other info as value from the tuples # Say we have the list of tuples [(1, 'abc'), (2, 'def')] where abc and def are the uids. The the function would generate # a dict in the form of {'abc': (1, 'abc'), 'def': (1, 'def')} # The map function basically returns a list of small dicts and the ChainMap combines them to one big dict changeDict = dict(ChainMap(*map(lambda x: {x[15]: x}, changes))) # Change booleans back to boolean values as they come from SQL as integers for change in changeDict: workingList = list(changeDict[change]) workingList[13] = changeDict[change][13] == 1 workingList[3] = changeDict[change][3] == 1 changeDict[change] = tuple(workingList) newEvents = [] updatedEvents = [] # Append number of recurring event to UID so we can tell them apart # Also removes the timezone from all the datetime objects evtIdx = {} for event in events: if not event.uid in evtIdx.keys(): # Event not known yet uid = event.uid event.uid = uid + '---0' evtIdx[uid] = 1 else: uid = event.uid event.uid = uid + '---' + str(evtIdx[uid]) evtIdx[uid] += 1 # Remove timezones event.start = removeTimezone(event.start) event.end = removeTimezone(event.end) event.last_modified = removeTimezone(event.last_modified) event.created = removeTimezone(event.created) # ________ __ ____ __ # / ____/ /_ ___ _____/ /__ / __/___ _____ _____/ /_ ____ _____ ____ ____ _____ # / / / __ \/ _ \/ ___/ //_/ / /_/ __ \/ ___/ / ___/ __ \/ __ `/ __ \/ __ `/ _ \/ ___/ # / /___/ / / / __/ /__/ ,< / __/ /_/ / / / /__/ / / / /_/ / / / / /_/ / __(__ ) # \____/_/ /_/\___/\___/_/|_| /_/ \____/_/ \___/_/ /_/\__,_/_/ /_/\__, /\___/____/ # /____/ for event in events: if not event.uid in uids: # New event, create event entry and new changeset if not event.uid in list(x.get('uid') for x in newEvents): # Only add to list if this event is not in the list yet (can happen in case of recurring events) evt = { # TODO add checks for existing values "uid": event.uid, "isDeleted": False, "new_summary": event.summary, "new_description": event.description, "new_start": event.start, "new_end": event.end, "new_last_modified": event.last_modified, "new_created": event.created, "new_location": event.location, "new_organizer": event.organizer, "new_categories": event.categories[0] if event.categories else '', "new_recurring": event.recurring } newEvents.append(evt) else: # Event is known, create new changeset hasChanges = False latestKnownState = changeDict.get(event.uid) changeSet = latestKnownState[4:14] # Relevant subset of latest know state data newEventTuple = tuple( [event.summary, event.description, event.start, event.end, event.last_modified, event.created, event.location, event.organizer, (event.categories[0] if event.categories else ''), event.recurring, event.uid]) # Check every value by looping over both the old and new tuples for dataIndex in range(0, 10): if newEventTuple[dataIndex] != changeSet[dataIndex]: hasChanges = True if hasChanges: updatedEvents.append(newEventTuple) # Now also check for deleted events deletedEvents = [] for uid in uids: # We have to check for every known uid if it still exists # The following condition my be a bit overwhelming but it basically checks three things: # 1. Is the uid in SQL but not in the list of fetched events? -> Has been deleted from RaPla? # 2. If there is already a changeset for this event, is the latest known state that it is not deleted? # 3. If there is no changeset for it yet, we can't check the latest known state so we just set it to deleted # -> this basically can't ever happen with real data but it happened during testing and it doesn't hurt to let in in here if uid not in list(x.uid for x in events) and ( uid in changeDict.keys() and not changeDict[uid][3] or uid not in changeDict.keys()): # Only insert if there is no 'deleted' record yet deletedEvents.append(tuple([uid])) # _ __ _ __ __ __ __ _____ ____ __ # | | / /____(_) /____ / /_ ____ ______/ /__ / /_____ / ___// __ \ / / # | | /| / / ___/ / __/ _ \ / __ \/ __ `/ ___/ //_/ / __/ __ \ \__ \/ / / / / / # | |/ |/ / / / / /_/ __/ / /_/ / /_/ / /__/ ,< / /_/ /_/ / ___/ / /_/ / / /___ # __/|__/_/ /_/\__/\___/ /_.___/\__,_/\___/_/|_| \__/\____/ /____/\___\_\/_____/ # Insert new events into rapla_entries table if newEvents: # Insert new events if there are any cur.executemany("INSERT INTO rapla_entries (uid, initialSummary) VALUES (%s, %s)", list((x.get('uid'), x.get('new_summary')) for x in newEvents)) conn.commit() changeData = list( (x.get('isDeleted'), x.get('new_summary'), x.get('new_description'), x.get('new_start'), x.get('new_end'), x.get('new_last_modified'), x.get('new_created'), x.get('new_location'), x.get('new_organizer'), x.get('new_categories'), x.get('new_recurring'), x.get('uid') ) for x in newEvents) changeQuery = """ INSERT INTO rapla_changes (isDeleted, new_summary, new_description, new_start, new_end, new_last_modified, new_created, new_location, new_organizer, new_categories, new_recurring, entry_id) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, (SELECT entry_id FROM rapla_entries WHERE uid = %s)) """ cur.executemany(changeQuery, changeData) conn.commit() if updatedEvents: # Insert changes to existing events if there are any changeQuery = """ INSERT INTO rapla_changes (isDeleted, new_summary, new_description, new_start, new_end, new_last_modified, new_created, new_location, new_organizer, new_categories, new_recurring, entry_id) VALUES (False, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, (SELECT entry_id FROM rapla_entries WHERE uid = %s)) """ cur.executemany(changeQuery, updatedEvents) conn.commit() if deletedEvents: cur.executemany( "INSERT INTO rapla_changes (entry_id, isDeleted) VALUES ((SELECT entry_id FROM rapla_entries WHERE uid = %s), 1)", deletedEvents) conn.commit() cur.close() conn.close() def removeTimezone(date: datetime) -> datetime: """ Removes the timezone part of a datetime object :param date: The datetime object to adjust :return: The adjusted object """ return datetime.strptime(date.strftime('%Y-%m-%d %H:%M:%S'), '%Y-%m-%d %H:%M:%S') if __name__ == "__main__": crawl()