#!/usr/bin/env python3
"""This file generates the lookup table from mime-db/db.json.

You only need to run this when updating mime-db.
"""

import collections
import io
import json


def assert_boring_ascii(text):
    """Check that it's safe to do code generation with this string."""
    # If there's unicode we'll get incorrect offsets
    # If mime-db ever starts containing unicode (unlikely!), process strings
    # with .encode("utf8") first
    text.encode("ascii")
    assert text.lower() == text
    assert text.isprintable()
    assert '"' not in text
    assert "'" not in text
    assert "\\" not in text
    assert "/" not in text


with open("mime-db/db.json") as f:
    db = json.load(f)

by_type = collections.defaultdict(dict)

for mime, info in sorted(db.items()):
    if extensions := info.get("extensions"):
        type_, subtype = mime.split("/")
        by_type[type_][subtype] = extensions[0]

raw_data = io.StringIO()
lookup_text = io.StringIO()
lookup_text.write(
    """// This file is generated by build.py
// Do not edit manually

&[
"""
)

for type_, extensions in by_type.items():
    assert_boring_ascii(type_)
    lookup_text.write(f"""    ("{type_}", &[\n""")
    for subtype, extension in extensions.items():
        assert_boring_ascii(subtype)
        assert_boring_ascii(extension)
        assert "." not in extension
        lookup_text.write(
            f"""        // {subtype}: {extension}
        Entry {{
            location: {raw_data.tell()},
            subtype_len: {len(subtype)},
            extension_len: {len(extension)},
        }},\n"""
        )
        raw_data.write(subtype)
        raw_data.write(extension)
    lookup_text.write("    ]),\n")
lookup_text.write("]\n")

with open("src/raw_data", "w") as f:
    f.write(raw_data.getvalue())

with open("src/lookup", "w") as f:
    f.write(lookup_text.getvalue())
