from __future__ import annotations

import difflib
import logging
from typing import Any, Dict, Tuple

from pr_agent.algo.git_patch_processing import extend_patch, handle_patch_deletions
from pr_agent.algo.language_handler import sort_files_by_main_languages
from pr_agent.algo.token_handler import TokenHandler
from pr_agent.config_loader import settings
from pr_agent.git_providers import GithubProvider

OUTPUT_BUFFER_TOKENS = 800
PATCH_EXTRA_LINES = 3


def get_pr_diff(git_provider: [GithubProvider, Any], token_handler: TokenHandler) -> str:
    """
    Returns a string with the diff of the PR.
    If needed, apply diff minimization techniques to reduce the number of tokens
    """
    files = list(git_provider.get_diff_files())

    # get pr languages
    pr_languages = sort_files_by_main_languages(git_provider.get_languages(), files)

    # generate a standard diff string, with patch extension
    patches_extended, total_tokens = pr_generate_extended_diff(pr_languages, token_handler)

    # if we are under the limit, return the full diff
    if total_tokens + OUTPUT_BUFFER_TOKENS < token_handler.limit:
        return "\n".join(patches_extended)

    # if we are over the limit, start pruning
    patches_compressed = pr_generate_compressed_diff(pr_languages, token_handler)
    return "\n".join(patches_compressed)


def pr_generate_extended_diff(pr_languages: list, token_handler: TokenHandler) -> \
        Tuple[list, int]:
    """
    Generate a standard diff string, with patch extension
    """
    total_tokens = token_handler.prompt_tokens  # initial tokens
    patches_extended = []
    for lang in pr_languages:
        for file in lang['files']:
            original_file_content_str = file.base_file
            new_file_content_str = file.head_file
            patch = file.patch

            # handle the case of large patch, that initially was not loaded
            patch = load_large_diff(file, new_file_content_str, original_file_content_str, patch)

            if not patch:
                continue

            # extend each patch with extra lines of context
            extended_patch = extend_patch(original_file_content_str, patch, num_lines=PATCH_EXTRA_LINES)
            full_extended_patch = f"## {file.filename}\n\n{extended_patch}\n"

            patch_tokens = token_handler.count_tokens(full_extended_patch)
            file.tokens = patch_tokens
            total_tokens += patch_tokens
            patches_extended.append(full_extended_patch)

    return patches_extended, total_tokens


def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler) -> list:
    # Apply Diff Minimization techniques to reduce the number of tokens:
    # 0. Start from the largest diff patch to smaller ones
    # 1. Don't use extend context lines around diff
    # 2. Minimize deleted files
    # 3. Minimize deleted hunks
    # 4. Minimize all remaining files when you reach token limit

    patches = []

    # sort each one of the languages in top_langs by the number of tokens in the diff
    sorted_files = []
    for lang in top_langs:
        sorted_files.extend(sorted(lang['files'], key=lambda x: x.tokens, reverse=True))

    total_tokens = token_handler.prompt_tokens
    for file in sorted_files:
        original_file_content_str = file.base_file
        new_file_content_str = file.head_file
        patch = file.patch
        patch = load_large_diff(file, new_file_content_str, original_file_content_str, patch)
        if not patch:
            continue

        # removing delete-only hunks
        patch = handle_patch_deletions(patch, original_file_content_str,
                                       new_file_content_str, file.filename)
        new_patch_tokens = token_handler.count_tokens(patch)

        if total_tokens > token_handler.limit - OUTPUT_BUFFER_TOKENS // 2:
            logging.warning(f"File was fully skipped, no more tokens: {file.filename}.")
            continue  # Hard Stop, no more tokens
        if total_tokens + new_patch_tokens > token_handler.limit - OUTPUT_BUFFER_TOKENS:
            # Current logic is to skip the patch if it's too large
            # TODO: Option for alternative logic to remove hunks from the patch to reduce the number of tokens
            #  until we meet the requirements
            if settings.config.verbosity_level >= 2:
                logging.warning(f"Patch too large, minimizing it, {file.filename}")
            patch = "File was modified"
        if patch:
            patch_final = f"## {file.filename}\n\n{patch}\n"
            patches.append(patch_final)
            total_tokens += token_handler.count_tokens(patch_final)
            if settings.config.verbosity_level >= 2:
                logging.info(f"Tokens: {total_tokens}, last filename: {file.filename}")
    return patches


def load_large_diff(file, new_file_content_str: str, original_file_content_str: str, patch: str) -> str:
    if not patch:  # to Do - also add condition for file extension
        try:
            diff = difflib.unified_diff(original_file_content_str.splitlines(keepends=True),
                                        new_file_content_str.splitlines(keepends=True))
            if settings.config.verbosity_level >= 2:
                logging.warning(f"File was modified, but no patch was found. Manually creating patch: {file.filename}.")
            patch = ''.join(diff)
        except Exception:
            pass
    return patch