#! /usr/bin/python3

"""
Migrate existing build results for a given project and all of its CoprDirs
from one storage (Copr backend) to another (Pulp).
"""

import os
import sys
import argparse
import logging
import time
from copr.v3 import Client, CoprRequestException
from copr_common.log import setup_script_logger
from copr_backend.helpers import BackendConfigReader
from copr_backend.constants import PULP_REDIRECT_FILE
from copr_backend.storage import PulpStorage
from copr_backend.frontend import FrontendClient
from copr_backend.exceptions import FrontendClientException


STORAGES = ["backend", "pulp"]

log = logging.getLogger(__name__)


def get_arg_parser():
    """
    CLI argument parser
    """
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--src",
        required=True,
        choices=STORAGES,
        help="The source storage",
    )
    parser.add_argument(
        "--dst",
        required=True,
        choices=STORAGES,
        help="The destination storage",
    )
    target = parser.add_mutually_exclusive_group(required=True)
    target.add_argument(
        "--project",
        help="Full name of the project that is to be migrated",
    )
    target.add_argument(
        "--owner",
        help="Migrate all projects for this owner",
    )
    parser.add_argument(
        "--delete",
        action="store_true",
        default=False,
        help="After migrating the data, remove it from the old storage",
    )
    return parser


def is_valid_build_directory(name):
    """
    See the `copr-backend-resultdir-cleaner`. We may want to share the code
    between them.
    """
    if name in ["repodata", "devel"]:
        return False

    if name.startswith("repodata.old") or name.startswith(".repodata."):
        return False

    if name in ["tmp", "cache", "appdata"]:
        return False

    parts = name.split("-")
    if len(parts) <= 1:
        return False

    number = parts[0]
    if len(number) != 8 or any(not c.isdigit() for c in number):
        return False

    return True


def change_on_frontend(client, owner, project, storage):
    """
    Request copr-frontend to change storage for this project in database
    """
    data = {
        "owner": owner,
        "project": project,
        "storage": storage,
    }
    client.post("change-storage", data)


def add_redirect(fullname):
    """
    Create a HTTP redirect for this project
    See https://pagure.io/fedora-infra/ansible/blob/main/f/roles/copr/backend/templates/lighttpd/pulp-redirect.lua.j2
    """
    path = PULP_REDIRECT_FILE
    with open(path, "a", encoding="utf-8") as fp:
        print(fullname, file=fp)


def change_storage_for_project(project, dst, config):
    """
    Migrate one project
    """
    # pylint: disable=too-many-locals
    # pylint: disable=too-many-statements
    # pylint: disable=too-many-branches
    # pylint: disable=too-many-nested-blocks
    ownerdir = os.path.join(config.destdir, project.ownername)
    ok = True

    for subproject_entry in os.scandir(ownerdir):
        subproject = subproject_entry.name
        if not (subproject == project.name or subproject.startswith(project.name + ":")):
            continue

        coprdir = os.path.join(ownerdir, subproject)
        for chroot_entry in os.scandir(coprdir):
            chroot = chroot_entry.name
            if chroot == "srpm-builds":
                continue

            if not chroot_entry.is_dir():
                continue

            chrootdir = os.path.join(coprdir, chroot)
            appstream = None
            devel = None
            storage = PulpStorage(
                project.ownername,
                subproject,
                appstream,
                devel,
                project.persistent,
                config,
                log,
            )

            # TODO Fault-tolerance and data consistency
            # Errors when creating things in Pulp will likely happen
            # (networking issues, unforseen Pulp validation, etc). We
            # should figure out how to ensure that all RPMs were
            # successfully uploaded, and if not, we know about it.
            #
            # We also need to make sure that no builds, actions, or cron,
            # are currently writing into the results directory. Otherwise
            # we can end up with incosystent data in Pulp.

            result = storage.init_project(subproject, chroot)
            if not result:
                log.error("Failed to initialize chroot: %s", chroot)
                ok = False
                break

            uploaded = {}

            # We need to sort them alphabetically so that we start uploading
            # from the oldest builds and therefore we upload all RPMs, even
            # if there are NVR duplicities.
            builddirs = sorted(os.scandir(chrootdir), key=lambda x: x.name)
            for builddir_entry in builddirs:
                if not builddir_entry.is_dir():
                    continue

                builddir = builddir_entry.name
                resultdir = os.path.join(chrootdir, builddir)

                if not is_valid_build_directory(builddir):
                    log.info("Skipping: %s", resultdir)
                    continue

                build_id = int(builddir.split("-")[0])

                to_upload = []
                rpms = storage.find_build_results(resultdir)
                for rpm in rpms:
                    # Was a package with this NEVRA already uploaded?
                    # We cannot simply uploaded.get(rpm) because the keys are
                    # full paths and we need to compare only basenames
                    # uploaded_rpm = None
                    basename = os.path.basename(rpm)
                    if basename in uploaded:
                        # If the already uploaded package comes from a newer
                        # build we don't need to bother with uploading this
                        # package
                        if build_id < uploaded[basename]["build_id"]:
                            log.info("Skipping duplicate %s from build %s, "
                                     "a newer build already uploaded",
                                     basename, build_id)
                            continue
                    to_upload.append(rpm)

                # We cannot check return code here
                results = storage.upload_build_results(
                    to_upload,
                    chroot,
                    build_id=build_id,
                )

                # It is possible that we already uploaded a package with the
                # same NEVRA. In such case we will replace it here, and the
                # previously uploaded package won't get into the repository.
                # That doesn't bother us because Pulp will garbage collect and
                # remove it.
                uploaded.update(results)

            # Add build results to the repository
            all_package_prns = [x["prn"] for x in uploaded.values()]
            if not storage.create_repository_version(subproject, chroot, all_package_prns):
                log.error("Failed to create repository version for chroot: %s", chroot)
                sys.exit(1)

            log.info("OK: %s", chroot)

    # Not everything was migrated successfully. Play it safe and fail.
    if not ok:
        log.error(
            "Failure during '%s' migration, not switching on frontend",
            project.full_name,
        )
        sys.exit(1)

    # Change storage in the frontend database
    frontend_client = FrontendClient(config, try_indefinitely=False, logger=log)
    try:
        change_on_frontend(frontend_client, project.ownername, project.name, dst)
    except FrontendClientException as ex:
        log.error("Failed to change storage on frontend for %s because: %s",
                  project.full_name, str(ex))
        # If the project was deleted on frontend, we don't mind
        if "404 NOT FOUND" not in str(ex):
            sys.exit(1)

    # At this point all data is successfully migrated and frontend thinks the
    # project is in Pulp, so we can safely add the HTTP redirect
    try:
        add_redirect(project.full_name)
    except OSError as ex:
        log.error("Failed to add a redirect for %s because: %s",
                  project.full_name, str(ex))

    log.info("Project %s successfully migrated", project.full_name)


def query_project(owner, name, config):
    """
    Query project information via the public API
    """
    client = Client({"copr_url": config.frontend_base_url})
    project = client.project_proxy.get(owner, name)
    return project


def has_comps(project, config):
    """
    Check if any chroot in the project has comps configured
    """
    client = Client({"copr_url": config.frontend_base_url})
    for chroot in project.chroot_repos:
        while True:
            try:
                chroot_detail = client.project_chroot_proxy.get(
                    project.ownername, project.name, chroot
                )
                break
            except CoprRequestException as ex:
                log.warning("Retrying to get chroot %s for %s/%s: %s",
                            chroot, project.ownername, project.name, str(ex))
                time.sleep(5)

        if chroot_detail.get("comps_name"):
            return True

    return False


def all_projects_for_owner(owner, config):
    """
    Return full names of all projects for a given owner
    We cannot simply list all directories in
    `os.path.join(config.destdir, owner)` because we need to filter out
    projects that were already migrated to Pulp.
    """
    fullnames = []
    client = Client({"copr_url": config.frontend_base_url})
    projects = client.project_proxy.get_list(owner)
    for project in projects:
        if project.storage == "pulp":
            continue
        fullnames.append(project.full_name)
    return fullnames


def handle_project(fullname, config, dst):
    """
    Process a single project migration
    """
    ownername, projectname = fullname.split("/", 1)
    project = query_project(ownername, projectname, config)
    if project.storage == dst:
        print(
            "The project {0} has already been migrated to {1}"
            .format(fullname, dst)
        )
        return 1

    if project.devel_mode:
        log.error("Skip: project %s uses manual createrepo, see %s",
                  fullname, "https://github.com/fedora-copr/copr/issues/4238")
        return 1

    if has_comps(project, config):
        print("Skipping {0} - comps.xml not supported in PULP".format(fullname))
        return 1

    change_storage_for_project(project, dst, config)
    return 0


def main():
    """
    The main function
    """
    setup_script_logger(log, "/var/log/copr-backend/change-storage.log")
    parser = get_arg_parser()
    args = parser.parse_args()

    if args.src == args.dst:
        log.info("The source and destination storage is the same, nothing to do.")
        return

    if args.src == "pulp":
        log.error("Migration from pulp to somewhere else is not supported")
        sys.exit(1)

    if args.delete:
        log.error("Data removal is not supported yet")
        sys.exit(1)

    config = BackendConfigReader("/etc/copr/copr-be.conf").read()
    if args.project:
        sys.exit(handle_project(args.project, config, args.dst))
    elif args.owner:
        projects = all_projects_for_owner(args.owner, config)
        for i, fullname in enumerate(projects, start=1):
            print(
                "[{0}/{1}] Migrating {2} to {3}"
                .format(i, len(projects), fullname, args.dst)
            )

            status = "Success." if handle_project(fullname, config, args.dst) == 0 else "Failure."
            log.info("Project migration for %s %s", fullname, status)

    else:
        log.error("Unexpected choice. This should never happen")
        sys.exit(1)


if __name__ == "__main__":
    try:
        main()
    except KeyboardInterrupt:
        pass
