# pylint: disable=line-too-long,useless-suppression
# coding=utf-8
# --------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for license information.
# Code generated by Microsoft (R) Python Code Generator.
# Changes may cause incorrect behavior and will be lost if the code is regenerated.
# --------------------------------------------------------------------------
import os
import pytest
from dotenv import load_dotenv
from devtools_testutils import (
    test_proxy,
    add_general_string_sanitizer,
    add_body_key_sanitizer,
    add_header_regex_sanitizer,
    add_uri_regex_sanitizer,
    add_general_regex_sanitizer,
    set_custom_default_matcher,
)

load_dotenv()

# Sanitized placeholder for blob storage container SAS URLs in test recordings
SANITIZED_CONTAINER_SAS_URL = "https://sanitized.blob.core.windows.net/container?sv=sanitized-sas-token"


@pytest.fixture(scope="session", autouse=True)
def start_proxy(test_proxy):
    # Ensures the test proxy is started for the session
    return


@pytest.fixture(scope="session", autouse=True)
def configure_test_proxy_matcher(test_proxy):
    """Configure the test proxy to handle LRO polling request matching.

    LRO operations (like begin_analyze) make multiple identical GET requests to poll status.
    The test proxy must match these requests in the correct order. We configure:

    1. compare_bodies=False: Don't match on body content (polling requests have no body)
    2. excluded_headers: Completely exclude these headers from matching consideration.
       These headers vary between recording and playback environments:
       - User-Agent: Contains Python version, OS version, CI build info
       - x-ms-client-request-id: Unique per request
       - x-ms-request-id: Server-generated, varies per call
       - Authorization: Different auth tokens between environments
       - Content-Length: May vary
    """
    set_custom_default_matcher(
        compare_bodies=False,
        excluded_headers="User-Agent,x-ms-client-request-id,x-ms-request-id,Authorization,Content-Length,Accept,Connection",
    )


# For security, please avoid record sensitive identity information in recordings
@pytest.fixture(scope="session", autouse=True)
def add_sanitizers(test_proxy):
    """Add sanitizers to hide secrets and sensitive information in recordings."""
    contentunderstanding_subscription_id = os.environ.get(
        "CONTENTUNDERSTANDING_SUBSCRIPTION_ID", "00000000-0000-0000-0000-000000000000"
    )
    contentunderstanding_tenant_id = os.environ.get(
        "CONTENTUNDERSTANDING_TENANT_ID", "00000000-0000-0000-0000-000000000000"
    )
    contentunderstanding_client_id = os.environ.get(
        "CONTENTUNDERSTANDING_CLIENT_ID", "00000000-0000-0000-0000-000000000000"
    )
    contentunderstanding_client_secret = os.environ.get(
        "CONTENTUNDERSTANDING_CLIENT_SECRET", "00000000-0000-0000-0000-000000000000"
    )

    # Use string sanitizers (safer than regex for exact values)
    if (
        contentunderstanding_subscription_id
        and contentunderstanding_subscription_id != "00000000-0000-0000-0000-000000000000"
    ):
        add_general_string_sanitizer(
            target=contentunderstanding_subscription_id, value="00000000-0000-0000-0000-000000000000"
        )
    if contentunderstanding_tenant_id and contentunderstanding_tenant_id != "00000000-0000-0000-0000-000000000000":
        add_general_string_sanitizer(
            target=contentunderstanding_tenant_id, value="00000000-0000-0000-0000-000000000000"
        )
    if contentunderstanding_client_id and contentunderstanding_client_id != "00000000-0000-0000-0000-000000000000":
        add_general_string_sanitizer(
            target=contentunderstanding_client_id, value="00000000-0000-0000-0000-000000000000"
        )
    if (
        contentunderstanding_client_secret
        and contentunderstanding_client_secret != "00000000-0000-0000-0000-000000000000"
    ):
        add_general_string_sanitizer(target=contentunderstanding_client_secret, value="fake-secret")

    # Sanitize API keys
    contentunderstanding_key = os.environ.get("CONTENTUNDERSTANDING_KEY", "")
    if contentunderstanding_key:
        add_general_string_sanitizer(target=contentunderstanding_key, value="fake-api-key")

    # Sanitize endpoint URLs to match DocumentIntelligence SDK pattern
    # Normalize any endpoint hostname to "Sanitized" to ensure recordings match between recording and playback
    # This regex matches the hostname part (between // and .services.ai.azure.com) and replaces it with "Sanitized"
    add_general_regex_sanitizer(value="Sanitized", regex="(?<=\\/\\/)[^/]+(?=\\.services\\.ai\\.azure\\.com)")

    # Sanitize Operation-Location headers specifically (used by LRO polling)
    # This ensures the poller uses the correct endpoint URL during playback
    # IMPORTANT: Do NOT use lookahead (?=...) as it doesn't consume the match,
    # causing double-domain bugs (e.g., Sanitized.services.ai.azure.com.services.ai.azure.com)
    add_header_regex_sanitizer(
        key="Operation-Location",
        value="https://Sanitized.services.ai.azure.com",
        regex=r"https://[a-zA-Z0-9\-]+\.services\.ai\.azure\.com",
    )

    # Sanitize Ocp-Apim-Subscription-Key header (where the API key is sent)
    add_header_regex_sanitizer(key="Ocp-Apim-Subscription-Key", value="fake-api-key", regex=".*")
    add_header_regex_sanitizer(key="Set-Cookie", value="[set-cookie;]")
    add_header_regex_sanitizer(key="Cookie", value="cookie;")
    add_body_key_sanitizer(json_path="$..access_token", value="access_token")

    # Sanitize cross-resource copy fields in request body
    # These fields are required for grant_copy_authorization and copy_analyzer API calls
    # Sanitizing them allows playback mode to use placeholder values
    add_body_key_sanitizer(json_path="$.targetAzureResourceId", value="placeholder-target-resource-id")
    add_body_key_sanitizer(json_path="$.targetRegion", value="placeholder-target-region")
    add_body_key_sanitizer(json_path="$..targetAzureResourceId", value="placeholder-target-resource-id")
    add_body_key_sanitizer(json_path="$..targetRegion", value="placeholder-target-region")
    add_body_key_sanitizer(json_path="$.sourceAzureResourceId", value="placeholder-source-resource-id")
    add_body_key_sanitizer(json_path="$.sourceRegion", value="placeholder-source-region")
    add_body_key_sanitizer(json_path="$..sourceAzureResourceId", value="placeholder-source-resource-id")
    add_body_key_sanitizer(json_path="$..sourceRegion", value="placeholder-source-region")

    # Sanitize cross-resource copy environment variable values
    # This ensures that real resource IDs and regions are sanitized before being stored in test proxy variables
    source_resource_id = os.environ.get("CONTENTUNDERSTANDING_SOURCE_RESOURCE_ID", "")
    if source_resource_id and source_resource_id != "placeholder-source-resource-id":
        add_general_string_sanitizer(target=source_resource_id, value="placeholder-source-resource-id")

    source_region = os.environ.get("CONTENTUNDERSTANDING_SOURCE_REGION", "")
    if source_region and source_region != "placeholder-source-region":
        add_general_string_sanitizer(target=source_region, value="placeholder-source-region")

    target_resource_id = os.environ.get("CONTENTUNDERSTANDING_TARGET_RESOURCE_ID", "")
    if target_resource_id and target_resource_id != "placeholder-target-resource-id":
        add_general_string_sanitizer(target=target_resource_id, value="placeholder-target-resource-id")

    target_region = os.environ.get("CONTENTUNDERSTANDING_TARGET_REGION", "")
    if target_region and target_region != "placeholder-target-region":
        add_general_string_sanitizer(target=target_region, value="placeholder-target-region")

    # Sanitize blob storage URLs and SAS tokens for labeled training data tests
    # This ensures that real storage account names, container names, and SAS tokens
    # are not recorded in test recordings.
    #
    # 1. Sanitize the storage account hostname in blob URLs (covers URIs, headers, and bodies)
    #    e.g. https://mystorageaccount.blob.core.windows.net -> https://sanitized.blob.core.windows.net
    add_general_regex_sanitizer(
        regex=r"https://[a-zA-Z0-9\-]+\.blob\.core\.windows\.net",
        value="https://sanitized.blob.core.windows.net",
    )

    # 2. Sanitize SAS tokens in query strings (everything after ?sv= or ?sp= in a blob URL)
    #    SAS tokens contain sensitive signing info; replace the query string with a fake token
    add_general_regex_sanitizer(
        regex=r"(?<=\.blob\.core\.windows\.net/[^?\s]{1,200})\?[^\s\"']+",
        value="?sv=sanitized-sas-token",
    )

    # 3. Sanitize the containerUrl field in JSON request/response bodies
    add_body_key_sanitizer(
        json_path="$..containerUrl",
        value=SANITIZED_CONTAINER_SAS_URL,
    )

    # 4. Sanitize training data prefix (may reveal internal folder structure)
    training_prefix = os.environ.get("CONTENTUNDERSTANDING_TRAINING_DATA_PREFIX", "")
    if training_prefix and training_prefix not in ("", "training_samples/"):
        add_general_string_sanitizer(target=training_prefix, value="training_samples/")

    # Sanitize dynamic analyzer IDs in URLs only
    # Note: We don't sanitize analyzer IDs in response bodies because tests using variables
    # (like test_sample_grant_copy_auth) need the actual IDs to match the variables.
    # URI sanitization is still needed for consistent URL matching in recordings.
    add_uri_regex_sanitizer(
        regex=r"/analyzers/test_analyzer_source_[a-f0-9]+",
        value="/analyzers/test_analyzer_source_0000000000000000",
    )
    add_uri_regex_sanitizer(
        regex=r"/analyzers/test_analyzer_target_[a-f0-9]+",
        value="/analyzers/test_analyzer_target_0000000000000000",
    )
    add_uri_regex_sanitizer(
        regex=r"/analyzers/test_analyzer_[a-f0-9]+",
        value="/analyzers/test_analyzer_0000000000000000",
    )
    add_uri_regex_sanitizer(
        regex=r"/analyzers/test_receipt_analyzer_[a-f0-9]+",
        value="/analyzers/test_receipt_analyzer_0000000000000000",
    )
