From b69ebb7224de81512d0492ab83389319dc43778a Mon Sep 17 00:00:00 2001 From: rads-1996 Date: Tue, 1 Jul 2025 05:26:47 -0700 Subject: [PATCH] Redact specific url query string values and url credentials in instrumentations (#3508) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Updated the instrumentation with aiohttp-server tests for url redaction * Updated the aiohttp-server implementation and the query redaction logic * Updated changelog and moved change to unreleased. Updated test files with license header * Improved formatting * Fixed failing tests * Fixed ruff * Update util/opentelemetry-util-http/src/opentelemetry/util/http/__init__.py Co-authored-by: Emídio Neto <9735060+emdneto@users.noreply.github.com> --------- Co-authored-by: Emídio Neto <9735060+emdneto@users.noreply.github.com> --- CHANGELOG.md | 5 + .../aiohttp_client/__init__.py | 6 +- .../tests/test_aiohttp_client_integration.py | 10 +- .../aiohttp_server/__init__.py | 5 +- .../tests/test_aiohttp_server_integration.py | 43 ++++++++ .../instrumentation/asgi/__init__.py | 4 +- .../tests/test_asgi_middleware.py | 6 +- .../instrumentation/httpx/__init__.py | 6 +- .../tests/test_httpx_integration.py | 38 +++++-- .../instrumentation/requests/__init__.py | 4 +- .../tests/test_requests_integration.py | 11 ++- .../instrumentation/tornado/client.py | 6 +- .../tests/test_instrumentation.py | 8 +- .../instrumentation/urllib/__init__.py | 4 +- .../tests/test_urllib_integration.py | 7 +- .../instrumentation/wsgi/__init__.py | 6 +- .../tests/test_wsgi_middleware.py | 5 +- .../src/opentelemetry/util/http/__init__.py | 63 +++++++++--- .../tests/test_redact_query_parameters.py | 98 +++++++++++++++++++ .../tests/test_redact_url.py | 64 ++++++++++++ .../tests/test_remove_credentials.py | 27 ++++- 21 files changed, 363 insertions(+), 63 deletions(-) create mode 100644 util/opentelemetry-util-http/tests/test_redact_query_parameters.py create mode 100644 util/opentelemetry-util-http/tests/test_redact_url.py diff --git a/CHANGELOG.md b/CHANGELOG.md index afde4eb7c..9a22fdfff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `opentelemetry-resource-detector-containerid`: make it more quiet on platforms without cgroups ([#3579](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3579)) +### Added + +- `opentelemetry-util-http` Added support for redacting specific url query string values and url credentials in instrumentations + ([#3508](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3508)) + ## Version 1.34.0/0.55b0 (2025-06-04) ### Fixed diff --git a/instrumentation/opentelemetry-instrumentation-aiohttp-client/src/opentelemetry/instrumentation/aiohttp_client/__init__.py b/instrumentation/opentelemetry-instrumentation-aiohttp-client/src/opentelemetry/instrumentation/aiohttp_client/__init__.py index 7bcf9fa1b..bee39d13b 100644 --- a/instrumentation/opentelemetry-instrumentation-aiohttp-client/src/opentelemetry/instrumentation/aiohttp_client/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-aiohttp-client/src/opentelemetry/instrumentation/aiohttp_client/__init__.py @@ -135,7 +135,7 @@ from opentelemetry.semconv.metrics.http_metrics import ( ) from opentelemetry.trace import Span, SpanKind, TracerProvider, get_tracer from opentelemetry.trace.status import Status, StatusCode -from opentelemetry.util.http import remove_url_credentials, sanitize_method +from opentelemetry.util.http import redact_url, sanitize_method _UrlFilterT = typing.Optional[typing.Callable[[yarl.URL], str]] _RequestHookT = typing.Optional[ @@ -311,9 +311,9 @@ def create_trace_config( method = params.method request_span_name = _get_span_name(method) request_url = ( - remove_url_credentials(trace_config_ctx.url_filter(params.url)) + redact_url(trace_config_ctx.url_filter(params.url)) if callable(trace_config_ctx.url_filter) - else remove_url_credentials(str(params.url)) + else redact_url(str(params.url)) ) span_attributes = {} diff --git a/instrumentation/opentelemetry-instrumentation-aiohttp-client/tests/test_aiohttp_client_integration.py b/instrumentation/opentelemetry-instrumentation-aiohttp-client/tests/test_aiohttp_client_integration.py index 042f4502b..ec608e6a6 100644 --- a/instrumentation/opentelemetry-instrumentation-aiohttp-client/tests/test_aiohttp_client_integration.py +++ b/instrumentation/opentelemetry-instrumentation-aiohttp-client/tests/test_aiohttp_client_integration.py @@ -762,16 +762,16 @@ class TestAioHttpIntegration(TestBase): ) self.memory_exporter.clear() - def test_credential_removal(self): + def test_remove_sensitive_params(self): trace_configs = [aiohttp_client.create_trace_config()] - app = HttpServerMock("test_credential_removal") + app = HttpServerMock("test_remove_sensitive_params") @app.route("/status/200") def index(): return "hello" - url = "http://username:password@localhost:5000/status/200" + url = "http://username:password@localhost:5000/status/200?Signature=secret" with app.run("localhost", 5000): with self.subTest(url=url): @@ -793,7 +793,9 @@ class TestAioHttpIntegration(TestBase): (StatusCode.UNSET, None), { HTTP_METHOD: "GET", - HTTP_URL: ("http://localhost:5000/status/200"), + HTTP_URL: ( + "http://REDACTED:REDACTED@localhost:5000/status/200?Signature=REDACTED" + ), HTTP_STATUS_CODE: int(HTTPStatus.OK), }, ) diff --git a/instrumentation/opentelemetry-instrumentation-aiohttp-server/src/opentelemetry/instrumentation/aiohttp_server/__init__.py b/instrumentation/opentelemetry-instrumentation-aiohttp-server/src/opentelemetry/instrumentation/aiohttp_server/__init__.py index 56dc0a4c2..1074298fc 100644 --- a/instrumentation/opentelemetry-instrumentation-aiohttp-server/src/opentelemetry/instrumentation/aiohttp_server/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-aiohttp-server/src/opentelemetry/instrumentation/aiohttp_server/__init__.py @@ -72,7 +72,7 @@ from opentelemetry.semconv._incubating.attributes.net_attributes import ( ) from opentelemetry.semconv.metrics import MetricInstruments from opentelemetry.trace.status import Status, StatusCode -from opentelemetry.util.http import get_excluded_urls, remove_url_credentials +from opentelemetry.util.http import get_excluded_urls, redact_url _duration_attrs = [ HTTP_METHOD, @@ -148,6 +148,7 @@ def collect_request_attributes(request: web.Request) -> Dict: request.url.port, str(request.url), ) + query_string = request.query_string if query_string and http_url: if isinstance(query_string, bytes): @@ -161,7 +162,7 @@ def collect_request_attributes(request: web.Request) -> Dict: HTTP_ROUTE: _get_view_func(request), HTTP_FLAVOR: f"{request.version.major}.{request.version.minor}", HTTP_TARGET: request.path, - HTTP_URL: remove_url_credentials(http_url), + HTTP_URL: redact_url(http_url), } http_method = request.method diff --git a/instrumentation/opentelemetry-instrumentation-aiohttp-server/tests/test_aiohttp_server_integration.py b/instrumentation/opentelemetry-instrumentation-aiohttp-server/tests/test_aiohttp_server_integration.py index 2e0c2e735..b85348c18 100644 --- a/instrumentation/opentelemetry-instrumentation-aiohttp-server/tests/test_aiohttp_server_integration.py +++ b/instrumentation/opentelemetry-instrumentation-aiohttp-server/tests/test_aiohttp_server_integration.py @@ -152,3 +152,46 @@ async def test_suppress_instrumentation( await client.get("/test-path") assert len(memory_exporter.get_finished_spans()) == 0 + + +@pytest.mark.asyncio +async def test_remove_sensitive_params(tracer, aiohttp_server): + """Test that sensitive information in URLs is properly redacted.""" + _, memory_exporter = tracer + + # Set up instrumentation + AioHttpServerInstrumentor().instrument() + + # Create app with test route + app = aiohttp.web.Application() + + async def handler(request): + return aiohttp.web.Response(text="hello") + + app.router.add_get("/status/200", handler) + + # Start the server + server = await aiohttp_server(app) + + # Make request with sensitive data in URL + url = f"http://username:password@{server.host}:{server.port}/status/200?Signature=secret" + async with aiohttp.ClientSession() as session: + async with session.get(url) as response: + assert response.status == 200 + assert await response.text() == "hello" + + # Verify redaction in span attributes + spans = memory_exporter.get_finished_spans() + assert len(spans) == 1 + + span = spans[0] + assert span.attributes[HTTP_METHOD] == "GET" + assert span.attributes[HTTP_STATUS_CODE] == 200 + assert ( + span.attributes[HTTP_URL] + == f"http://{server.host}:{server.port}/status/200?Signature=REDACTED" + ) + + # Clean up + AioHttpServerInstrumentor().uninstrument() + memory_exporter.clear() diff --git a/instrumentation/opentelemetry-instrumentation-asgi/src/opentelemetry/instrumentation/asgi/__init__.py b/instrumentation/opentelemetry-instrumentation-asgi/src/opentelemetry/instrumentation/asgi/__init__.py index a7f4fc731..814d4c5a2 100644 --- a/instrumentation/opentelemetry-instrumentation-asgi/src/opentelemetry/instrumentation/asgi/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-asgi/src/opentelemetry/instrumentation/asgi/__init__.py @@ -278,7 +278,7 @@ from opentelemetry.util.http import ( get_custom_headers, normalise_request_header_name, normalise_response_header_name, - remove_url_credentials, + redact_url, sanitize_method, ) @@ -375,7 +375,7 @@ def collect_request_attributes( if _report_old(sem_conv_opt_in_mode): _set_http_url( result, - remove_url_credentials(http_url), + redact_url(http_url), _StabilityMode.DEFAULT, ) http_method = scope.get("method", "") diff --git a/instrumentation/opentelemetry-instrumentation-asgi/tests/test_asgi_middleware.py b/instrumentation/opentelemetry-instrumentation-asgi/tests/test_asgi_middleware.py index 95a874bcd..6b63315c8 100644 --- a/instrumentation/opentelemetry-instrumentation-asgi/tests/test_asgi_middleware.py +++ b/instrumentation/opentelemetry-instrumentation-asgi/tests/test_asgi_middleware.py @@ -1809,12 +1809,14 @@ class TestAsgiAttributes(unittest.TestCase): otel_asgi.set_status_code(self.span, "Invalid Status Code") self.assertEqual(self.span.set_status.call_count, 1) - def test_credential_removal(self): + def test_remove_sensitive_params(self): self.scope["server"] = ("username:password@mock", 80) self.scope["path"] = "/status/200" + self.scope["query_string"] = b"X-Goog-Signature=1234567890" attrs = otel_asgi.collect_request_attributes(self.scope) self.assertEqual( - attrs[SpanAttributes.HTTP_URL], "http://mock/status/200" + attrs[SpanAttributes.HTTP_URL], + "http://REDACTED:REDACTED@mock/status/200?X-Goog-Signature=REDACTED", ) def test_collect_target_attribute_missing(self): diff --git a/instrumentation/opentelemetry-instrumentation-httpx/src/opentelemetry/instrumentation/httpx/__init__.py b/instrumentation/opentelemetry-instrumentation-httpx/src/opentelemetry/instrumentation/httpx/__init__.py index 8c0b71259..d3d73a1e0 100644 --- a/instrumentation/opentelemetry-instrumentation-httpx/src/opentelemetry/instrumentation/httpx/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-httpx/src/opentelemetry/instrumentation/httpx/__init__.py @@ -259,7 +259,7 @@ from opentelemetry.semconv.metrics.http_metrics import ( from opentelemetry.trace import SpanKind, Tracer, TracerProvider, get_tracer from opentelemetry.trace.span import Span from opentelemetry.trace.status import StatusCode -from opentelemetry.util.http import remove_url_credentials, sanitize_method +from opentelemetry.util.http import redact_url, sanitize_method _logger = logging.getLogger(__name__) @@ -313,7 +313,7 @@ def _extract_parameters( # In httpx >= 0.20.0, handle_request receives a Request object request: httpx.Request = args[0] method = request.method.encode() - url = httpx.URL(remove_url_credentials(str(request.url))) + url = httpx.URL(str(request.url)) headers = request.headers stream = request.stream extensions = request.extensions @@ -382,7 +382,7 @@ def _apply_request_client_attributes_to_span( ) # http semconv transition: http.url -> url.full - _set_http_url(span_attributes, str(url), semconv) + _set_http_url(span_attributes, redact_url(str(url)), semconv) # Set HTTP method in metric labels _set_http_method( diff --git a/instrumentation/opentelemetry-instrumentation-httpx/tests/test_httpx_integration.py b/instrumentation/opentelemetry-instrumentation-httpx/tests/test_httpx_integration.py index e9dcc377b..28185b933 100644 --- a/instrumentation/opentelemetry-instrumentation-httpx/tests/test_httpx_integration.py +++ b/instrumentation/opentelemetry-instrumentation-httpx/tests/test_httpx_integration.py @@ -1301,12 +1301,26 @@ class TestSyncIntegration(BaseTestCases.BaseManualTest): self.assert_span(num_spans=1) self.assert_metrics(num_metrics=1) - def test_credential_removal(self): - new_url = "http://username:password@mock/status/200" + def test_remove_sensitive_params(self): + new_url = "http://username:password@mock/status/200?sig=secret" self.perform_request(new_url) span = self.assert_span() - self.assertEqual(span.attributes[SpanAttributes.HTTP_URL], self.URL) + actual_url = span.attributes[SpanAttributes.HTTP_URL] + + if "@" in actual_url: + # If credentials are present, they must be redacted + self.assertEqual( + span.attributes[SpanAttributes.HTTP_URL], + "http://REDACTED:REDACTED@mock/status/200?sig=REDACTED", + ) + else: + # If credentials are removed completely, the query string should still be redacted + self.assertIn( + "http://mock/status/200?sig=REDACTED", + actual_url, + f"Basic URL structure is incorrect: {actual_url}", + ) class TestAsyncIntegration(BaseTestCases.BaseManualTest): @@ -1373,12 +1387,24 @@ class TestAsyncIntegration(BaseTestCases.BaseManualTest): self.assert_span(num_spans=2) self.assert_metrics(num_metrics=1) - def test_credential_removal(self): - new_url = "http://username:password@mock/status/200" + def test_remove_sensitive_params(self): + new_url = "http://username:password@mock/status/200?Signature=secret" self.perform_request(new_url) span = self.assert_span() - self.assertEqual(span.attributes[SpanAttributes.HTTP_URL], self.URL) + actual_url = span.attributes[SpanAttributes.HTTP_URL] + + if "@" in actual_url: + self.assertEqual( + span.attributes[SpanAttributes.HTTP_URL], + "http://REDACTED:REDACTED@mock/status/200?Signature=REDACTED", + ) + else: + self.assertIn( + "http://mock/status/200?Signature=REDACTED", + actual_url, + f"If credentials are removed, the query string still should be redacted {actual_url}", + ) class TestSyncInstrumentationIntegration(BaseTestCases.BaseInstrumentorTest): diff --git a/instrumentation/opentelemetry-instrumentation-requests/src/opentelemetry/instrumentation/requests/__init__.py b/instrumentation/opentelemetry-instrumentation-requests/src/opentelemetry/instrumentation/requests/__init__.py index 9f76b717c..7cfc3a4fe 100644 --- a/instrumentation/opentelemetry-instrumentation-requests/src/opentelemetry/instrumentation/requests/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-requests/src/opentelemetry/instrumentation/requests/__init__.py @@ -147,7 +147,7 @@ from opentelemetry.util.http import ( ExcludeList, get_excluded_urls, parse_excluded_urls, - remove_url_credentials, + redact_url, sanitize_method, ) from opentelemetry.util.http.httplib import set_ip_on_next_http_connection @@ -232,7 +232,7 @@ def _instrument( method = request.method span_name = get_default_span_name(method) - url = remove_url_credentials(request.url) + url = redact_url(request.url) span_attributes = {} _set_http_method( diff --git a/instrumentation/opentelemetry-instrumentation-requests/tests/test_requests_integration.py b/instrumentation/opentelemetry-instrumentation-requests/tests/test_requests_integration.py index 485091549..ac3d41294 100644 --- a/instrumentation/opentelemetry-instrumentation-requests/tests/test_requests_integration.py +++ b/instrumentation/opentelemetry-instrumentation-requests/tests/test_requests_integration.py @@ -686,12 +686,17 @@ class TestRequestsIntegration(RequestsIntegrationTestBase, TestBase): return requests.get(url, timeout=5) return session.get(url) - def test_credential_removal(self): - new_url = "http://username:password@mock/status/200" + def test_remove_sensitive_params(self): + new_url = ( + "http://username:password@mock/status/200?AWSAccessKeyId=secret" + ) self.perform_request(new_url) span = self.assert_span() - self.assertEqual(span.attributes[HTTP_URL], self.URL) + self.assertEqual( + span.attributes[HTTP_URL], + "http://REDACTED:REDACTED@mock/status/200?AWSAccessKeyId=REDACTED", + ) def test_if_headers_equals_none(self): result = requests.get(self.URL, headers=None, timeout=5) diff --git a/instrumentation/opentelemetry-instrumentation-tornado/src/opentelemetry/instrumentation/tornado/client.py b/instrumentation/opentelemetry-instrumentation-tornado/src/opentelemetry/instrumentation/tornado/client.py index da8015312..8660181c8 100644 --- a/instrumentation/opentelemetry-instrumentation-tornado/src/opentelemetry/instrumentation/tornado/client.py +++ b/instrumentation/opentelemetry-instrumentation-tornado/src/opentelemetry/instrumentation/tornado/client.py @@ -26,7 +26,7 @@ from opentelemetry.semconv._incubating.attributes.http_attributes import ( HTTP_URL, ) from opentelemetry.trace.status import Status, StatusCode -from opentelemetry.util.http import remove_url_credentials +from opentelemetry.util.http import redact_url def _normalize_request(args, kwargs): @@ -79,7 +79,7 @@ def fetch_async( if span.is_recording(): attributes = { - HTTP_URL: remove_url_credentials(request.url), + HTTP_URL: redact_url(request.url), HTTP_METHOD: request.method, } for key, value in attributes.items(): @@ -165,7 +165,7 @@ def _finish_tracing_callback( def _create_metric_attributes(response): metric_attributes = { HTTP_STATUS_CODE: response.code, - HTTP_URL: remove_url_credentials(response.request.url), + HTTP_URL: redact_url(response.request.url), HTTP_METHOD: response.request.method, } diff --git a/instrumentation/opentelemetry-instrumentation-tornado/tests/test_instrumentation.py b/instrumentation/opentelemetry-instrumentation-tornado/tests/test_instrumentation.py index 329b05bc8..ae9f304fb 100644 --- a/instrumentation/opentelemetry-instrumentation-tornado/tests/test_instrumentation.py +++ b/instrumentation/opentelemetry-instrumentation-tornado/tests/test_instrumentation.py @@ -500,8 +500,8 @@ class TestTornadoInstrumentation(TornadoTest, WsgiTestBase): set_global_response_propagator(orig) - def test_credential_removal(self): - app = HttpServerMock("test_credential_removal") + def test_remove_sensitive_params(self): + app = HttpServerMock("test_remove_sensitive_params") @app.route("/status/200") def index(): @@ -509,7 +509,7 @@ class TestTornadoInstrumentation(TornadoTest, WsgiTestBase): with app.run("localhost", 5000): response = self.fetch( - "http://username:password@localhost:5000/status/200" + "http://username:password@localhost:5000/status/200?Signature=secret" ) self.assertEqual(response.code, 200) @@ -522,7 +522,7 @@ class TestTornadoInstrumentation(TornadoTest, WsgiTestBase): self.assertSpanHasAttributes( client, { - HTTP_URL: "http://localhost:5000/status/200", + HTTP_URL: "http://REDACTED:REDACTED@localhost:5000/status/200?Signature=REDACTED", HTTP_METHOD: "GET", HTTP_STATUS_CODE: 200, }, diff --git a/instrumentation/opentelemetry-instrumentation-urllib/src/opentelemetry/instrumentation/urllib/__init__.py b/instrumentation/opentelemetry-instrumentation-urllib/src/opentelemetry/instrumentation/urllib/__init__.py index 6da278fdc..6ed83338a 100644 --- a/instrumentation/opentelemetry-instrumentation-urllib/src/opentelemetry/instrumentation/urllib/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-urllib/src/opentelemetry/instrumentation/urllib/__init__.py @@ -136,7 +136,7 @@ from opentelemetry.util.http import ( ExcludeList, get_excluded_urls, parse_excluded_urls, - remove_url_credentials, + redact_url, sanitize_method, ) from opentelemetry.util.types import Attributes @@ -258,7 +258,7 @@ def _instrument( span_name = _get_span_name(method) - url = remove_url_credentials(url) + url = redact_url(url) data = getattr(request, "data", None) request_size = 0 if data is None else len(data) diff --git a/instrumentation/opentelemetry-instrumentation-urllib/tests/test_urllib_integration.py b/instrumentation/opentelemetry-instrumentation-urllib/tests/test_urllib_integration.py index b085cc50d..219a48494 100644 --- a/instrumentation/opentelemetry-instrumentation-urllib/tests/test_urllib_integration.py +++ b/instrumentation/opentelemetry-instrumentation-urllib/tests/test_urllib_integration.py @@ -512,14 +512,17 @@ class URLLibIntegrationTestBase(abc.ABC): span = self.assert_span() self.assertEqual(span.status.status_code, StatusCode.ERROR) - def test_credential_removal(self): + def test_remove_sensitive_params(self): url = "http://username:password@mock/status/200" with self.assertRaises(Exception): self.perform_request(url) span = self.assert_span() - self.assertEqual(span.attributes[SpanAttributes.HTTP_URL], self.URL) + self.assertEqual( + span.attributes[SpanAttributes.HTTP_URL], + "http://REDACTED:REDACTED@mock/status/200", + ) def test_hooks(self): def request_hook(span, request_obj): diff --git a/instrumentation/opentelemetry-instrumentation-wsgi/src/opentelemetry/instrumentation/wsgi/__init__.py b/instrumentation/opentelemetry-instrumentation-wsgi/src/opentelemetry/instrumentation/wsgi/__init__.py index b1ecdf8bc..ecbc25628 100644 --- a/instrumentation/opentelemetry-instrumentation-wsgi/src/opentelemetry/instrumentation/wsgi/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-wsgi/src/opentelemetry/instrumentation/wsgi/__init__.py @@ -274,7 +274,7 @@ from opentelemetry.util.http import ( get_custom_headers, normalise_request_header_name, normalise_response_header_name, - remove_url_credentials, + redact_url, sanitize_method, ) @@ -371,9 +371,7 @@ def collect_request_attributes( else: # old semconv v1.20.0 if _report_old(sem_conv_opt_in_mode): - result[HTTP_URL] = remove_url_credentials( - wsgiref_util.request_uri(environ) - ) + result[HTTP_URL] = redact_url(wsgiref_util.request_uri(environ)) remote_addr = environ.get("REMOTE_ADDR") if remote_addr: diff --git a/instrumentation/opentelemetry-instrumentation-wsgi/tests/test_wsgi_middleware.py b/instrumentation/opentelemetry-instrumentation-wsgi/tests/test_wsgi_middleware.py index 0cf7d06a0..5a6e2d21f 100644 --- a/instrumentation/opentelemetry-instrumentation-wsgi/tests/test_wsgi_middleware.py +++ b/instrumentation/opentelemetry-instrumentation-wsgi/tests/test_wsgi_middleware.py @@ -818,11 +818,12 @@ class TestWsgiAttributes(unittest.TestCase): self.assertEqual(mock_span.is_recording.call_count, 2) self.assertEqual(attrs[HTTP_STATUS_CODE], 404) - def test_credential_removal(self): + def test_remove_sensitive_params(self): self.environ["HTTP_HOST"] = "username:password@mock" self.environ["PATH_INFO"] = "/status/200" + self.environ["QUERY_STRING"] = "sig=secret" expected = { - HTTP_URL: "http://mock/status/200", + HTTP_URL: "http://REDACTED:REDACTED@mock/status/200?sig=REDACTED", NET_HOST_PORT: 80, } self.assertGreaterEqual( diff --git a/util/opentelemetry-util-http/src/opentelemetry/util/http/__init__.py b/util/opentelemetry-util-http/src/opentelemetry/util/http/__init__.py index ae81dea6e..6c1403fc4 100644 --- a/util/opentelemetry-util-http/src/opentelemetry/util/http/__init__.py +++ b/util/opentelemetry-util-http/src/opentelemetry/util/http/__init__.py @@ -20,7 +20,7 @@ from re import IGNORECASE as RE_IGNORECASE from re import compile as re_compile from re import search from typing import Callable, Iterable, overload -from urllib.parse import urlparse, urlunparse +from urllib.parse import parse_qs, urlencode, urlparse, urlunparse from opentelemetry.semconv._incubating.attributes.http_attributes import ( HTTP_FLAVOR, @@ -69,6 +69,8 @@ _active_requests_count_attrs = { HTTP_SERVER_NAME, } +PARAMS_TO_REDACT = ["AWSAccessKeyId", "Signature", "sig", "X-Goog-Signature"] + class ExcludeList: """Class to exclude certain paths (given as a list of regexes) from tracing requests""" @@ -170,23 +172,23 @@ def parse_excluded_urls(excluded_urls: str) -> ExcludeList: def remove_url_credentials(url: str) -> str: - """Given a string url, remove the username and password only if it is a valid url""" - + """Given a string url, replace the username and password with the keyword `REDACTED` only if it is a valid url""" try: parsed = urlparse(url) if all([parsed.scheme, parsed.netloc]): # checks for valid url - parsed_url = urlparse(url) - _, _, netloc = parsed.netloc.rpartition("@") - return urlunparse( - ( - parsed_url.scheme, - netloc, - parsed_url.path, - parsed_url.params, - parsed_url.query, - parsed_url.fragment, + if "@" in parsed.netloc: + _, _, host = parsed.netloc.rpartition("@") + new_netloc = "REDACTED:REDACTED@" + host + return urlunparse( + ( + parsed.scheme, + new_netloc, + parsed.path, + parsed.params, + parsed.query, + parsed.fragment, + ) ) - ) except ValueError: # an unparsable url was passed pass return url @@ -266,3 +268,36 @@ def _parse_url_query(url: str): path = parsed_url.path query_params = parsed_url.query return path, query_params + + +def redact_query_parameters(url: str) -> str: + """Given a string url, redact sensitive query parameter values""" + try: + parsed = urlparse(url) + if not parsed.query: # No query parameters to redact + return url + query_params = parse_qs(parsed.query) + if not any(param in query_params for param in PARAMS_TO_REDACT): + return url + for param in PARAMS_TO_REDACT: + if param in query_params: + query_params[param] = ["REDACTED"] + return urlunparse( + ( + parsed.scheme, + parsed.netloc, + parsed.path, + parsed.params, + urlencode(query_params, doseq=True), + parsed.fragment, + ) + ) + except ValueError: # an unparsable url was passed + return url + + +def redact_url(url: str) -> str: + """Redact sensitive data from the URL, including credentials and query parameters.""" + url = remove_url_credentials(url) + url = redact_query_parameters(url) + return url diff --git a/util/opentelemetry-util-http/tests/test_redact_query_parameters.py b/util/opentelemetry-util-http/tests/test_redact_query_parameters.py new file mode 100644 index 000000000..fb49cd82e --- /dev/null +++ b/util/opentelemetry-util-http/tests/test_redact_query_parameters.py @@ -0,0 +1,98 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +from opentelemetry.util.http import redact_query_parameters + + +class TestRedactSensitiveInfo(unittest.TestCase): + def test_redact_goog_signature(self): + url = "https://www.example.com/path?color=blue&X-Goog-Signature=secret" + self.assertEqual( + redact_query_parameters(url), + "https://www.example.com/path?color=blue&X-Goog-Signature=REDACTED", + ) + + def test_no_redaction_needed(self): + url = "https://www.example.com/path?color=blue&query=secret" + self.assertEqual( + redact_query_parameters(url), + "https://www.example.com/path?color=blue&query=secret", + ) + + def test_no_query_parameters(self): + url = "https://www.example.com/path" + self.assertEqual( + redact_query_parameters(url), "https://www.example.com/path" + ) + + def test_empty_query_string(self): + url = "https://www.example.com/path?" + self.assertEqual( + redact_query_parameters(url), "https://www.example.com/path?" + ) + + def test_empty_url(self): + url = "" + self.assertEqual(redact_query_parameters(url), "") + + def test_redact_aws_access_key_id(self): + url = "https://www.example.com/path?color=blue&AWSAccessKeyId=secrets" + self.assertEqual( + redact_query_parameters(url), + "https://www.example.com/path?color=blue&AWSAccessKeyId=REDACTED", + ) + + def test_api_key_not_in_redact_list(self): + url = "https://www.example.com/path?api_key=secret%20key&user=john" + self.assertNotEqual( + redact_query_parameters(url), + "https://www.example.com/path?api_key=REDACTED&user=john", + ) + + def test_password_key_not_in_redact_list(self): + url = "https://api.example.com?key=abc&password=123&user=admin" + self.assertNotEqual( + redact_query_parameters(url), + "https://api.example.com?key=REDACTED&password=REDACTED&user=admin", + ) + + def test_url_with_at_symbol_in_path_and_query(self): + url = "https://example.com/p@th?foo=b@r" + self.assertEqual( + redact_query_parameters(url), "https://example.com/p@th?foo=b@r" + ) + + def test_aws_access_key_with_real_format(self): + url = "https://mock.com?AWSAccessKeyId=AKIAIOSFODNN7" + self.assertEqual( + redact_query_parameters(url), + "https://mock.com?AWSAccessKeyId=REDACTED", + ) + + def test_signature_parameter(self): + url = ( + "https://service.com?sig=39Up9jzHkxhuIhFE9594DJxe7w6cIRCg0V6ICGS0" + ) + self.assertEqual( + redact_query_parameters(url), "https://service.com?sig=REDACTED" + ) + + def test_signature_with_url_encoding(self): + url = "https://service.com?Signature=39Up9jzHkxhuIhFE9594DJxe7w6cIRCg0V6ICGS0%3A377" + self.assertEqual( + redact_query_parameters(url), + "https://service.com?Signature=REDACTED", + ) diff --git a/util/opentelemetry-util-http/tests/test_redact_url.py b/util/opentelemetry-util-http/tests/test_redact_url.py new file mode 100644 index 000000000..a311a90f6 --- /dev/null +++ b/util/opentelemetry-util-http/tests/test_redact_url.py @@ -0,0 +1,64 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +from opentelemetry.util.http import redact_url + + +class TestRedactUrl(unittest.TestCase): + def test_redact_both_credentials_and_query_params(self): + """Test URL with both credentials and sensitive query parameters.""" + url = "https://user:password@api.example.com/data?AWSAccessKeyId=AKIAIOSFODNN7&color=blue" + expected = "https://REDACTED:REDACTED@api.example.com/data?AWSAccessKeyId=REDACTED&color=blue" + self.assertEqual(redact_url(url), expected) + + def test_multiple_sensitive_query_params(self): + """Test URL with multiple sensitive query parameters.""" + url = "https://admin:1234@example.com/secure?Signature=abc123&X-Goog-Signature=xyz789&sig=def456" + expected = "https://REDACTED:REDACTED@example.com/secure?Signature=REDACTED&X-Goog-Signature=REDACTED&sig=REDACTED" + self.assertEqual(redact_url(url), expected) + + def test_url_with_special_characters(self): + """Test URL with special characters in both credentials and query parameters.""" + url = "https://user@domain:p@ss!word@api.example.com/path?Signature=s%40me+special%20chars&normal=fine" + expected = "https://REDACTED:REDACTED@api.example.com/path?Signature=REDACTED&normal=fine" + self.assertEqual(redact_url(url), expected) + + def test_edge_cases(self): + """Test unusual URL formats and corner cases.""" + # URL with fragment + url1 = ( + "https://user:pass@api.example.com/data?Signature=secret#section" + ) + self.assertEqual( + redact_url(url1), + "https://REDACTED:REDACTED@api.example.com/data?Signature=REDACTED#section", + ) + + # URL with port number + url2 = ( + "https://user:pass@api.example.com:8443/data?AWSAccessKeyId=secret" + ) + self.assertEqual( + redact_url(url2), + "https://REDACTED:REDACTED@api.example.com:8443/data?AWSAccessKeyId=REDACTED", + ) + + # URL with IP address instead of domain + url3 = "https://user:pass@192.168.1.1/path?X-Goog-Signature=xyz" + self.assertEqual( + redact_url(url3), + "https://REDACTED:REDACTED@192.168.1.1/path?X-Goog-Signature=REDACTED", + ) diff --git a/util/opentelemetry-util-http/tests/test_remove_credentials.py b/util/opentelemetry-util-http/tests/test_remove_credentials.py index 2e50f1495..b58e4b7e0 100644 --- a/util/opentelemetry-util-http/tests/test_remove_credentials.py +++ b/util/opentelemetry-util-http/tests/test_remove_credentials.py @@ -1,3 +1,17 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import unittest from opentelemetry.util.http import remove_url_credentials @@ -5,27 +19,30 @@ from opentelemetry.util.http import remove_url_credentials class TestRemoveUrlCredentials(unittest.TestCase): def test_remove_no_credentials(self): - url = "http://opentelemetry.io:8080/test/path?query=value" + url = "http://mock/status/200/test/path?query=value" cleaned_url = remove_url_credentials(url) self.assertEqual(cleaned_url, url) def test_remove_credentials(self): - url = "http://someuser:somepass@opentelemetry.io:8080/test/path?query=value" + url = "http://someuser:somepass@mock/status/200/test/path?sig=value" cleaned_url = remove_url_credentials(url) self.assertEqual( - cleaned_url, "http://opentelemetry.io:8080/test/path?query=value" + cleaned_url, + "http://REDACTED:REDACTED@mock/status/200/test/path?sig=value", ) def test_remove_credentials_ipv4_literal(self): url = "http://someuser:somepass@127.0.0.1:8080/test/path?query=value" cleaned_url = remove_url_credentials(url) self.assertEqual( - cleaned_url, "http://127.0.0.1:8080/test/path?query=value" + cleaned_url, + "http://REDACTED:REDACTED@127.0.0.1:8080/test/path?query=value", ) def test_remove_credentials_ipv6_literal(self): url = "http://someuser:somepass@[::1]:8080/test/path?query=value" cleaned_url = remove_url_credentials(url) self.assertEqual( - cleaned_url, "http://[::1]:8080/test/path?query=value" + cleaned_url, + "http://REDACTED:REDACTED@[::1]:8080/test/path?query=value", )