관리-도구

편집 파일: aws_lambda.py

import sys
from copy import deepcopy
from datetime import datetime, timedelta
from os import environ

from sentry_sdk.api import continue_trace
from sentry_sdk.consts import OP
from sentry_sdk.hub import Hub, _should_send_default_pii
from sentry_sdk.tracing import TRANSACTION_SOURCE_COMPONENT
from sentry_sdk.utils import (
    AnnotatedValue,
    capture_internal_exceptions,
    event_from_exception,
    logger,
    TimeoutThread,
)
from sentry_sdk.integrations import Integration
from sentry_sdk.integrations._wsgi_common import _filter_headers
from sentry_sdk._compat import reraise
from sentry_sdk._types import TYPE_CHECKING

if TYPE_CHECKING:
    from typing import Any
    from typing import TypeVar
    from typing import Callable
    from typing import Optional

from sentry_sdk._types import EventProcessor, Event, Hint

F = TypeVar("F", bound=Callable[..., Any])

# Constants
TIMEOUT_WARNING_BUFFER = 1500  # Buffer time required to send timeout warning to Sentry
MILLIS_TO_SECONDS = 1000.0

def _wrap_init_error(init_error):
    # type: (F) -> F
    def sentry_init_error(*args, **kwargs):
        # type: (*Any, **Any) -> Any

hub = Hub.current
        integration = hub.get_integration(AwsLambdaIntegration)
        if integration is None:
            return init_error(*args, **kwargs)

# If an integration is there, a client has to be there.
        client = hub.client  # type: Any

with capture_internal_exceptions():
            with hub.configure_scope() as scope:
                scope.clear_breadcrumbs()

exc_info = sys.exc_info()
            if exc_info and all(exc_info):
                sentry_event, hint = event_from_exception(
                    exc_info,
                    client_options=client.options,
                    mechanism={"type": "aws_lambda", "handled": False},
                )
                hub.capture_event(sentry_event, hint=hint)

return init_error(*args, **kwargs)

return sentry_init_error  # type: ignore

def _wrap_handler(handler):
    # type: (F) -> F
    def sentry_handler(aws_event, aws_context, *args, **kwargs):
        # type: (Any, Any, *Any, **Any) -> Any

# Per https://docs.aws.amazon.com/lambda/latest/dg/python-handler.html,
        # `event` here is *likely* a dictionary, but also might be a number of
        # other types (str, int, float, None).
        #
        # In some cases, it is a list (if the user is batch-invoking their
        # function, for example), in which case we'll use the first entry as a
        # representative from which to try pulling request data. (Presumably it
        # will be the same for all events in the list, since they're all hitting
        # the lambda in the same request.)

if isinstance(aws_event, list):
            request_data = aws_event[0]
            batch_size = len(aws_event)
        else:
            request_data = aws_event
            batch_size = 1

if not isinstance(request_data, dict):
            # If we're not dealing with a dictionary, we won't be able to get
            # headers, path, http method, etc in any case, so it's fine that
            # this is empty
            request_data = {}

hub = Hub.current
        integration = hub.get_integration(AwsLambdaIntegration)
        if integration is None:
            return handler(aws_event, aws_context, *args, **kwargs)

# If an integration is there, a client has to be there.
        client = hub.client  # type: Any
        configured_time = aws_context.get_remaining_time_in_millis()

with hub.push_scope() as scope:
            timeout_thread = None
            with capture_internal_exceptions():
                scope.clear_breadcrumbs()
                scope.add_event_processor(
                    _make_request_event_processor(
                        request_data, aws_context, configured_time
                    )
                )
                scope.set_tag(
                    "aws_region", aws_context.invoked_function_arn.split(":")[3]
                )
                if batch_size > 1:
                    scope.set_tag("batch_request", True)
                    scope.set_tag("batch_size", batch_size)

# Starting the Timeout thread only if the configured time is greater than Timeout warning
                # buffer and timeout_warning parameter is set True.
                if (
                    integration.timeout_warning
                    and configured_time > TIMEOUT_WARNING_BUFFER
                ):
                    waiting_time = (
                        configured_time - TIMEOUT_WARNING_BUFFER
                    ) / MILLIS_TO_SECONDS

timeout_thread = TimeoutThread(
                        waiting_time,
                        configured_time / MILLIS_TO_SECONDS,
                    )

# Starting the thread to raise timeout warning exception
                    timeout_thread.start()

headers = request_data.get("headers")
            # AWS Service may set an explicit `{headers: None}`, we can't rely on `.get()`'s default.
            if headers is None:
                headers = {}

transaction = continue_trace(
                headers,
                op=OP.FUNCTION_AWS,
                name=aws_context.function_name,
                source=TRANSACTION_SOURCE_COMPONENT,
            )
            with hub.start_transaction(
                transaction,
                custom_sampling_context={
                    "aws_event": aws_event,
                    "aws_context": aws_context,
                },
            ):
                try:
                    return handler(aws_event, aws_context, *args, **kwargs)
                except Exception:
                    exc_info = sys.exc_info()
                    sentry_event, hint = event_from_exception(
                        exc_info,
                        client_options=client.options,
                        mechanism={"type": "aws_lambda", "handled": False},
                    )
                    hub.capture_event(sentry_event, hint=hint)
                    reraise(*exc_info)
                finally:
                    if timeout_thread:
                        timeout_thread.stop()

return sentry_handler  # type: ignore

def _drain_queue():
    # type: () -> None
    with capture_internal_exceptions():
        hub = Hub.current
        integration = hub.get_integration(AwsLambdaIntegration)
        if integration is not None:
            # Flush out the event queue before AWS kills the
            # process.
            hub.flush()

class AwsLambdaIntegration(Integration):
    identifier = "aws_lambda"

def __init__(self, timeout_warning=False):
        # type: (bool) -> None
        self.timeout_warning = timeout_warning

@staticmethod
    def setup_once():
        # type: () -> None

lambda_bootstrap = get_lambda_bootstrap()
        if not lambda_bootstrap:
            logger.warning(
                "Not running in AWS Lambda environment, "
                "AwsLambdaIntegration disabled (could not find bootstrap module)"
            )
            return

if not hasattr(lambda_bootstrap, "handle_event_request"):
            logger.warning(
                "Not running in AWS Lambda environment, "
                "AwsLambdaIntegration disabled (could not find handle_event_request)"
            )
            return

pre_37 = hasattr(lambda_bootstrap, "handle_http_request")  # Python 3.6 or 2.7

if pre_37:
            old_handle_event_request = lambda_bootstrap.handle_event_request

def sentry_handle_event_request(request_handler, *args, **kwargs):
                # type: (Any, *Any, **Any) -> Any
                request_handler = _wrap_handler(request_handler)
                return old_handle_event_request(request_handler, *args, **kwargs)

lambda_bootstrap.handle_event_request = sentry_handle_event_request

old_handle_http_request = lambda_bootstrap.handle_http_request

def sentry_handle_http_request(request_handler, *args, **kwargs):
                # type: (Any, *Any, **Any) -> Any
                request_handler = _wrap_handler(request_handler)
                return old_handle_http_request(request_handler, *args, **kwargs)

lambda_bootstrap.handle_http_request = sentry_handle_http_request

# Patch to_json to drain the queue. This should work even when the
            # SDK is initialized inside of the handler

old_to_json = lambda_bootstrap.to_json

def sentry_to_json(*args, **kwargs):
                # type: (*Any, **Any) -> Any
                _drain_queue()
                return old_to_json(*args, **kwargs)

lambda_bootstrap.to_json = sentry_to_json
        else:
            lambda_bootstrap.LambdaRuntimeClient.post_init_error = _wrap_init_error(
                lambda_bootstrap.LambdaRuntimeClient.post_init_error
            )

old_handle_event_request = lambda_bootstrap.handle_event_request

def sentry_handle_event_request(  # type: ignore
                lambda_runtime_client, request_handler, *args, **kwargs
            ):
                request_handler = _wrap_handler(request_handler)
                return old_handle_event_request(
                    lambda_runtime_client, request_handler, *args, **kwargs
                )

lambda_bootstrap.handle_event_request = sentry_handle_event_request

# Patch the runtime client to drain the queue. This should work
            # even when the SDK is initialized inside of the handler

def _wrap_post_function(f):
                # type: (F) -> F
                def inner(*args, **kwargs):
                    # type: (*Any, **Any) -> Any
                    _drain_queue()
                    return f(*args, **kwargs)

return inner  # type: ignore

lambda_bootstrap.LambdaRuntimeClient.post_invocation_result = (
                _wrap_post_function(
                    lambda_bootstrap.LambdaRuntimeClient.post_invocation_result
                )
            )
            lambda_bootstrap.LambdaRuntimeClient.post_invocation_error = (
                _wrap_post_function(
                    lambda_bootstrap.LambdaRuntimeClient.post_invocation_error
                )
            )

def get_lambda_bootstrap():
    # type: () -> Optional[Any]

# Python 2.7: Everything is in `__main__`.
    #
    # Python 3.7: If the bootstrap module is *already imported*, it is the
    # one we actually want to use (no idea what's in __main__)
    #
    # Python 3.8: bootstrap is also importable, but will be the same file
    # as __main__ imported under a different name:
    #
    #     sys.modules['__main__'].__file__ == sys.modules['bootstrap'].__file__
    #     sys.modules['__main__'] is not sys.modules['bootstrap']
    #
    # Python 3.9: bootstrap is in __main__.awslambdaricmain
    #
    # On container builds using the `aws-lambda-python-runtime-interface-client`
    # (awslamdaric) module, bootstrap is located in sys.modules['__main__'].bootstrap
    #
    # Such a setup would then make all monkeypatches useless.
    if "bootstrap" in sys.modules:
        return sys.modules["bootstrap"]
    elif "__main__" in sys.modules:
        module = sys.modules["__main__"]
        # python3.9 runtime
        if hasattr(module, "awslambdaricmain") and hasattr(
            module.awslambdaricmain, "bootstrap"
        ):
            return module.awslambdaricmain.bootstrap
        elif hasattr(module, "bootstrap"):
            # awslambdaric python module in container builds
            return module.bootstrap

# python3.8 runtime
        return module
    else:
        return None

def _make_request_event_processor(aws_event, aws_context, configured_timeout):
    # type: (Any, Any, Any) -> EventProcessor
    start_time = datetime.utcnow()

def event_processor(sentry_event, hint, start_time=start_time):
        # type: (Event, Hint, datetime) -> Optional[Event]
        remaining_time_in_milis = aws_context.get_remaining_time_in_millis()
        exec_duration = configured_timeout - remaining_time_in_milis

extra = sentry_event.setdefault("extra", {})
        extra["lambda"] = {
            "function_name": aws_context.function_name,
            "function_version": aws_context.function_version,
            "invoked_function_arn": aws_context.invoked_function_arn,
            "aws_request_id": aws_context.aws_request_id,
            "execution_duration_in_millis": exec_duration,
            "remaining_time_in_millis": remaining_time_in_milis,
        }

extra["cloudwatch logs"] = {
            "url": _get_cloudwatch_logs_url(aws_context, start_time),
            "log_group": aws_context.log_group_name,
            "log_stream": aws_context.log_stream_name,
        }

request = sentry_event.get("request", {})

if "httpMethod" in aws_event:
            request["method"] = aws_event["httpMethod"]

request["url"] = _get_url(aws_event, aws_context)

if "queryStringParameters" in aws_event:
            request["query_string"] = aws_event["queryStringParameters"]

if "headers" in aws_event:
            request["headers"] = _filter_headers(aws_event["headers"])

if _should_send_default_pii():
            user_info = sentry_event.setdefault("user", {})

identity = aws_event.get("identity")
            if identity is None:
                identity = {}

id = identity.get("userArn")
            if id is not None:
                user_info.setdefault("id", id)

ip = identity.get("sourceIp")
            if ip is not None:
                user_info.setdefault("ip_address", ip)

if "body" in aws_event:
                request["data"] = aws_event.get("body", "")
        else:
            if aws_event.get("body", None):
                # Unfortunately couldn't find a way to get structured body from AWS
                # event. Meaning every body is unstructured to us.
                request["data"] = AnnotatedValue.removed_because_raw_data()

sentry_event["request"] = deepcopy(request)

return sentry_event

return event_processor

def _get_url(aws_event, aws_context):
    # type: (Any, Any) -> str
    path = aws_event.get("path", None)

headers = aws_event.get("headers")
    if headers is None:
        headers = {}

host = headers.get("Host", None)
    proto = headers.get("X-Forwarded-Proto", None)
    if proto and host and path:
        return "{}://{}{}".format(proto, host, path)
    return "awslambda:///{}".format(aws_context.function_name)

def _get_cloudwatch_logs_url(aws_context, start_time):
    # type: (Any, datetime) -> str
    """
    Generates a CloudWatchLogs console URL based on the context object

Arguments:
        aws_context {Any} -- context from lambda handler

Returns:
        str -- AWS Console URL to logs.
    """
    formatstring = "%Y-%m-%dT%H:%M:%SZ"
    region = environ.get("AWS_REGION", "")

url = (
        "https://console.{domain}/cloudwatch/home?region={region}"
        "#logEventViewer:group={log_group};stream={log_stream}"
        ";start={start_time};end={end_time}"
    ).format(
        domain="amazonaws.cn" if region.startswith("cn-") else "aws.amazon.com",
        region=region,
        log_group=aws_context.log_group_name,
        log_stream=aws_context.log_stream_name,
        start_time=(start_time - timedelta(seconds=1)).strftime(formatstring),
        end_time=(datetime.utcnow() + timedelta(seconds=2)).strftime(formatstring),
    )

return url