#!/usr/bin/env python3

from datetime import datetime
from datetime import timedelta
import urllib.request
import os
import re
import pytz

for branch in ["maint_12.x", "maint_11.x", "maint_10.0.x", "master"]:

    baseurl = "https://simulationresearch.lbl.gov/ReferenceResults/modelica-buildings/%s/Dymola/" % branch
    outputPath = "/mnt/ReferenceFiles/Buildings/%s" % branch

    conn = urllib.request.urlopen(baseurl)
    contents = conn.read().decode(conn.headers.get_content_charset()).split("\n")
    # contents = open("index.html").read().split("\n")

    PST = pytz.timezone('America/Los_Angeles')
    UTC = pytz.timezone('UTC')

    if not '   <tr><th valign="top"><img src="/icons/blank.gif" alt="[ICO]"></th><th><a href="?C=N;O=D">Name</a></th><th><a href="?C=M;O=A">Last modified</a></th><th><a href="?C=S;O=A">Size</a></th><th><a href="?C=D;O=A">Description</a></th></tr>' in contents:
        raise Exception("Webpage index changed? %s" % contents)

    for line in contents:
        if not line.startswith("<tr>"):
            continue
        match = re.search('href="([^"]*[.]txt)"', line)
        if match:
            fileName = match[1].strip()
        else:
            continue
        match = re.search('<td[^>]*>([0-9][0-9][0-9][0-9]-[0-9: -]*)</td>', line)
        if not match:
            raise Exception("Could not find timestamp for %s" % line)
        timeStamp = match[1].strip()
        timeStamp = datetime.strptime(timeStamp, "%Y-%m-%d %H:%M")
        timeStamp = PST.localize(timeStamp)
        # print(fileName, timeStamp)
        redownload = True
        path = '%s/%s' % (outputPath, fileName)
        if os.path.exists(path):
            dt_m = os.path.getmtime(path)
            dt_m = datetime.fromtimestamp(dt_m, UTC)
            # print('Modified on:', dt_m)
            if (abs(dt_m - timeStamp) < timedelta(seconds=60)):
                redownload = False
        if redownload:
            print("Downloading %s" % fileName)
            conn = urllib.request.urlopen("%s/%s" % (baseurl, fileName))
            last_modified = conn.headers['last-modified']
            # print(last_modified)
            dt = UTC.localize(datetime.strptime(last_modified, '%a, %d %b %Y %H:%M:%S %Z')).astimezone(PST)
            # print(dt)
            data = conn.read()
            with open(path, "wb") as fout:
                fout.write(data)
            os.utime(path, (dt.timestamp(), dt.timestamp()))
