Skip to content

Commit 8d86f40

Browse files
committed
Address Code Review
1 parent 20c6824 commit 8d86f40

5 files changed

Lines changed: 187 additions & 151 deletions

File tree

src/core/src/service_interfaces/CredentialSanitizer.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
# limitations under the License.
1414
#
1515
# Requires Python 2.7+
16-
import logging
1716
import re
1817

1918

@@ -36,11 +35,10 @@ def sanitize(self, message):
3635
# (1) scheme: https://, http://, or ftp://
3736
# (2) username: one or more non-whitespace, non-slash, non-colon, non-@ characters
3837
# (3) password: zero or more non-whitespace, non-slash, non-@ characters
39-
sanitized_message = re.sub(
40-
r'(https?://|ftp://)([^:/@\s]+):([^@/\s]*)@',r'\1\2@',message)
38+
sanitized_message = re.sub(r'(https?://|ftp://)([^:/@\s]+):([^@/\s]*)@',r'\1\2@',message)
4139
self.composite_logger.log_verbose("Message was sanitized to remove sensitive information. [InputMessage={0}][SanitizedMessage={1}]".format(str(message), str(sanitized_message)))
4240
return sanitized_message
4341
except Exception as error:
44-
self.composite_logger.log_error("Error occurred while sanitizing credentials from message: {0}".format(repr(error)))
42+
self.composite_logger.log_error("Error occurred while sanitizing credentials from message: [Error={0}]".format(repr(error)))
4543
return message
4644

src/core/tests/Test_TelemetryWriter.py

Lines changed: 96 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ def test_write_event(self):
7171

7272
self.assertTrue(telemetry_event_counter_in_first_test_event is not None)
7373
self.assertTrue(telemetry_event_counter_in_second_test_event is not None)
74-
self.assertTrue(int(telemetry_event_counter_in_second_test_event) - int(telemetry_event_counter_in_first_test_event) == 1 if telemetry_event_counter_in_first_test_event and telemetry_event_counter_in_second_test_event else False)
74+
self.assertTrue(int(telemetry_event_counter_in_second_test_event) - int(telemetry_event_counter_in_first_test_event) == 1)
7575

7676
def test_write_multiple_events_in_same_file(self):
7777
time_backup = time.time
@@ -312,103 +312,125 @@ def test_write_event_with_buffer_true_and_empty_string_and_then_flush_with_non_e
312312
self.assertTrue(text_found.string.startswith("Message 1"))
313313

314314
# ==================== Unit Tests for Credential Sanitization ====================
315-
def test_sanitize_credentials_from_uri_https_with_credentials_leak(self):
316-
""" Test sanitization of HTTPS URIs with credentials """
317-
# Clear any existing event files before test
315+
# ==================== Helper functions for Credential Sanitization Tests ====================
316+
def _clear_events_folder(self):
317+
"""
318+
Helper method to clear the events folder for sanitization test setup.
319+
Removes all existing JSON event files.
320+
"""
318321
for f in os.listdir(self.runtime.telemetry_writer.events_folder_path):
319322
if f.endswith('.json'):
320323
os.remove(os.path.join(self.runtime.telemetry_writer.events_folder_path, f))
321324

322-
# Verify events folder is empty before test
323-
self.assertTrue(len(os.listdir(self.runtime.telemetry_writer.events_folder_path)) == 0, "Events folder should be empty before writing")
325+
def _read_event_from_file(self, file_index=None, event_index=-1):
326+
"""
327+
Helper method to open and read an event from an event file in the events folder.
328+
Args:
329+
file_index: Index of the event file to read. If None, uses latest file
330+
event_index: Index of the event within the file (default: -1 for last event)
331+
Returns: The parsed event dictionary from the JSON file
332+
"""
333+
event_files = [pos_json for pos_json in os.listdir(self.runtime.telemetry_writer.events_folder_path) if re.search('^[0-9]+.json$', pos_json)]
334+
if not event_files:
335+
raise Exception("No event files found in events folder")
336+
337+
if file_index is None:
338+
event_file_path = os.path.join(self.runtime.telemetry_writer.events_folder_path, event_files[-1])
339+
else:
340+
event_file_path = os.path.join(self.runtime.telemetry_writer.events_folder_path, event_files[file_index])
341+
342+
with open(event_file_path, 'r+') as f:
343+
events = json.load(f)
344+
f.close()
345+
if not events:
346+
raise Exception("No events found in event file")
347+
return events[event_index]
348+
349+
def _get_message_without_tc(self, event):
350+
"""
351+
Helper method to extract the message without the TC (telemetry counter) portion.
352+
Args:
353+
event: The event dictionary
354+
Returns: The message portion before " [TC=" marker
355+
"""
356+
return event["Message"][:event["Message"].rfind(" [TC=")]
357+
358+
def _validate_sanitized_event(self, expected_message, task_name=None, event_index=-1, file_index=None):
359+
"""
360+
Helper method to validate an event's message and task name against expected values.
361+
Args:
362+
expected_message: The expected sanitized message (without TC counter)
363+
task_name: The expected task name (optional validation)
364+
event_index: Index of the event within the file (default: -1 for last event)
365+
file_index: Index of the event file (default: None for latest file)
366+
"""
367+
event = self._read_event_from_file(file_index=file_index, event_index=event_index)
368+
369+
self.assertIsNotNone(event)
370+
message_without_tc = self._get_message_without_tc(event)
371+
self.assertEqual(expected_message, message_without_tc)
372+
if task_name is not None:
373+
self.assertEqual(task_name, event["TaskName"])
374+
375+
# ==================== Credential Sanitization Test Cases ====================
376+
def test_sanitize_credentials_from_uri_https_with_credentials_leak(self):
377+
""" Test sanitization of HTTPS URIs with credentials """
378+
self._clear_events_folder()
379+
self.assertEqual(len([f for f in os.listdir(self.runtime.telemetry_writer.events_folder_path) if re.search('^[0-9]+.json$', f)]), 0)
324380

325381
message = "Error connecting to https://testuser:TESTTOKEN123456@invalid.repo.example/rpm/repodata/repomd.xml"
382+
expected_message = "Error connecting to https://testuser@invalid.repo.example/rpm/repodata/repomd.xml"
383+
326384
self.runtime.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task")
327385

328-
# Verify exactly one event file was created
329-
latest_event_file = [pos_json for pos_json in os.listdir(self.runtime.telemetry_writer.events_folder_path) if re.search('^[0-9]+.json$', pos_json)][-1]
386+
# Validate exactly one event file was created
330387
event_files_count = len([f for f in os.listdir(self.runtime.telemetry_writer.events_folder_path) if re.search('^[0-9]+.json$', f)])
331-
self.assertEqual(event_files_count, 1, "Events folder should contain exactly one event file")
388+
self.assertEqual(event_files_count, 1)
332389

333-
with open(os.path.join(self.runtime.telemetry_writer.events_folder_path, latest_event_file), 'r+') as f:
334-
events = json.load(f)
335-
self.assertTrue(events is not None)
336-
self.assertEqual(events[-1]["TaskName"], "Test Task")
337-
message_without_tc = events[-1]["Message"][:events[-1]["Message"].rfind(" [TC=")]
338-
self.assertEqual("Error connecting to https://testuser@invalid.repo.example/rpm/repodata/repomd.xml", message_without_tc)
339-
f.close()
390+
# Validate using helper
391+
self._validate_sanitized_event(expected_message, task_name="Test Task")
340392

341393
def test_sanitize_credentials_from_uri_http_with_credentials_leak(self):
342394
""" Test sanitization of HTTP URIs with credentials """
343395
message = "Connection failed to http://user123:password123@example.com/path"
396+
expected_message = "Connection failed to http://user123@example.com/path"
397+
344398
self.runtime.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task")
345399

346-
latest_event_file = [pos_json for pos_json in os.listdir(self.runtime.telemetry_writer.events_folder_path) if re.search('^[0-9]+.json$', pos_json)][-1]
347-
with open(os.path.join(self.runtime.telemetry_writer.events_folder_path, latest_event_file), 'r+') as f:
348-
events = json.load(f)
349-
self.assertTrue(events is not None)
350-
self.assertEqual(events[-1]["TaskName"], "Test Task")
351-
# Verify entire message matches expected output (excluding TC counter)
352-
message_without_tc = events[-1]["Message"][:events[-1]["Message"].rfind(" [TC=")]
353-
self.assertEqual("Connection failed to http://user123@example.com/path", message_without_tc)
354-
f.close()
400+
self._validate_sanitized_event(expected_message, task_name="Test Task")
355401

356402
def test_sanitize_credentials_multiple_urls_with_credentials_leak(self):
357403
""" Test sanitization with multiple URLs containing credentials """
358404
message = "Failed to fetch from https://user1:pass1@host1.com/api and http://user2:pass2@host2.com/data"
405+
expected_message = "Failed to fetch from https://user1@host1.com/api and http://user2@host2.com/data"
406+
359407
self.runtime.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task")
360408

361-
latest_event_file = [pos_json for pos_json in os.listdir(self.runtime.telemetry_writer.events_folder_path) if re.search('^[0-9]+.json$', pos_json)][-1]
362-
with open(os.path.join(self.runtime.telemetry_writer.events_folder_path, latest_event_file), 'r+') as f:
363-
events = json.load(f)
364-
self.assertTrue(events is not None)
365-
self.assertEqual(events[-1]["TaskName"], "Test Task")
366-
message_without_tc = events[-1]["Message"][:events[-1]["Message"].rfind(" [TC=")]
367-
self.assertEqual("Failed to fetch from https://user1@host1.com/api and http://user2@host2.com/data", message_without_tc)
368-
f.close()
409+
self._validate_sanitized_event(expected_message, task_name="Test Task")
369410

370411
def test_sanitize_credentials_with_error_and_no_credentials(self):
371412
""" ERROR with 401 status code from jfrog.io """
372413
message = "ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml"
414+
expected_message = "ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml"
415+
373416
self.runtime.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task")
374417

375-
latest_event_file = [pos_json for pos_json in os.listdir(self.runtime.telemetry_writer.events_folder_path) if re.search('^[0-9]+.json$', pos_json)][-1]
376-
with open(os.path.join(self.runtime.telemetry_writer.events_folder_path, latest_event_file), 'r+') as f:
377-
events = json.load(f)
378-
self.assertTrue(events is not None)
379-
self.assertEqual(events[-1]["TaskName"], "Test Task")
380-
# Verify entire message matches expected output (excluding TC counter)
381-
message_without_tc = events[-1]["Message"][:events[-1]["Message"].rfind(" [TC=")]
382-
self.assertEqual("ERROR: Failed to download metadata for repo 'packages-microsoft-com-prod': Status code: 401 for https://cec-aa.jfrog.io/artifactory/glib-rpm-hel9-lts-microsoft-com/repodata/repomd.xml", message_without_tc)
383-
f.close()
418+
self._validate_sanitized_event(expected_message, task_name="Test Task")
384419

385420
def test_sanitize_credentials_with_error_and_credentials_leak(self):
386421
""" Curl error with buildbot:BuildBotToken credentials """
387422
message = ("Curl error (6): Couldn't resolve host 'packages.microsoft.com' Could not "
388423
"retrieve mirrorlist https://buildbot:BuildBotToken@mirror.example.com/repodata/repomd.xml")
389-
self.runtime.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task")
424+
expected_message = ("Curl error (6): Couldn't resolve host 'packages.microsoft.com' Could not "
425+
"retrieve mirrorlist https://buildbot@mirror.example.com/repodata/repomd.xml")
390426

391-
latest_event_file = [pos_json for pos_json in os.listdir(self.runtime.telemetry_writer.events_folder_path) if re.search('^[0-9]+.json$', pos_json)][-1]
392-
with open(os.path.join(self.runtime.telemetry_writer.events_folder_path, latest_event_file), 'r+') as f:
393-
events = json.load(f)
394-
self.assertTrue(events is not None)
395-
self.assertEqual(events[-1]["TaskName"], "Test Task")
396-
# Verify entire message matches expected output (excluding TC counter)
397-
message_without_tc = events[-1]["Message"][:events[-1]["Message"].rfind(" [TC=")]
398-
expected_message = ("Curl error (6): Couldn't resolve host 'packages.microsoft.com' Could not "
399-
"retrieve mirrorlist https://buildbot@mirror.example.com/repodata/repomd.xml")
400-
self.assertEqual(expected_message, message_without_tc)
401-
f.close()
427+
self.runtime.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task")
428+
self._validate_sanitized_event(expected_message, task_name="Test Task")
402429

403430
def test_sanitize_credentials_with_credentials_leak(self):
404431
""" ERROR with expired SSL certs and TESTTOKEN123456 """
405-
# Clear any existing event files before test
406-
for f in os.listdir(self.runtime.telemetry_writer.events_folder_path):
407-
if f.endswith('.json'):
408-
os.remove(os.path.join(self.runtime.telemetry_writer.events_folder_path, f))
409-
410-
# Verify events folder is empty before test
411-
self.assertTrue(len([f for f in os.listdir(self.runtime.telemetry_writer.events_folder_path) if re.search('^[0-9]+.json$', f)]) == 0, "Events folder should be empty before writing")
432+
self._clear_events_folder()
433+
self.assertEqual(len([f for f in os.listdir(self.runtime.telemetry_writer.events_folder_path) if re.search('^[0-9]+.json$', f)]), 0)
412434

413435
message = ("ERROR: Customer environment error (expired SSL certs): "
414436
"Command=sudo yum update -y --disablerepo='*' "
@@ -419,30 +441,22 @@ def test_sanitize_credentials_with_credentials_leak(self):
419441
"for https://testuser:TESTTOKEN123456@packages-microsoft-com-prod/CENTRAL.rpm "
420442
"Error: Failed to download metadata for repo 'packages-microsoft-com-prod': "
421443
"Cannot download repomd.xml: All mirrors were tried")
444+
expected_message = ("ERROR: Customer environment error (expired SSL certs): "
445+
"Command=sudo yum update -y --disablerepo='*' "
446+
"--enablerepo='microsoft' !!Code=11 Out- Updating "
447+
"Subscription Management repositories. "
448+
"Unable to read consumer identity This system is not registered "
449+
"with an entitlement server. Status code: 401 "
450+
"for https://testuser@packages-microsoft-com-prod/CENTRAL.rpm "
451+
"Error: Failed to download metadata for repo 'packages-microsoft-com-prod': "
452+
"Cannot download repomd.xml: All mirrors were tried")
453+
422454
self.runtime.telemetry_writer.write_event(message, Constants.TelemetryEventLevel.Error, "Test Task")
423455

424-
# Verify exactly one event file was created
456+
# Validate exactly one event file was created
425457
event_files_count = len([f for f in os.listdir(self.runtime.telemetry_writer.events_folder_path) if re.search('^[0-9]+.json$', f)])
426-
self.assertEqual(event_files_count, 1, "Events folder should contain exactly one event file")
427-
428-
latest_event_file = [pos_json for pos_json in os.listdir(self.runtime.telemetry_writer.events_folder_path) if re.search('^[0-9]+.json$', pos_json)][-1]
429-
with open(os.path.join(self.runtime.telemetry_writer.events_folder_path, latest_event_file), 'r+') as f:
430-
events = json.load(f)
431-
self.assertTrue(events is not None)
432-
self.assertEqual(events[-1]["TaskName"], "Test Task")
433-
# Verify entire message matches expected output (excluding TC counter)
434-
message_without_tc = events[-1]["Message"][:events[-1]["Message"].rfind(" [TC=")]
435-
expected_message = ("ERROR: Customer environment error (expired SSL certs): "
436-
"Command=sudo yum update -y --disablerepo='*' "
437-
"--enablerepo='microsoft' !!Code=11 Out- Updating "
438-
"Subscription Management repositories. "
439-
"Unable to read consumer identity This system is not registered "
440-
"with an entitlement server. Status code: 401 "
441-
"for https://testuser@packages-microsoft-com-prod/CENTRAL.rpm "
442-
"Error: Failed to download metadata for repo 'packages-microsoft-com-prod': "
443-
"Cannot download repomd.xml: All mirrors were tried")
444-
self.assertEqual(expected_message, message_without_tc)
445-
f.close()
458+
self.assertEqual(event_files_count, 1)
459+
self._validate_sanitized_event(expected_message, task_name="Test Task")
446460

447461
def test_sanitize_credentials_exception_handling(self):
448462
""" Test exception handling: passing None should return the input unchanged """

src/extension/src/CredentialSanitizer.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,10 @@ def sanitize(self, message):
3636
# (1) scheme: https://, http://, or ftp://
3737
# (2) username: one or more non-whitespace, non-slash, non-colon, non-@ characters
3838
# (3) password: zero or more non-whitespace, non-slash, non-@ characters
39-
sanitized_message = re.sub(
40-
r'(https?://|ftp://)([^:/@\s]+):([^@/\s]*)@',r'\1\2@',message)
41-
self.logger.log_verbose(
42-
"Message was sanitized to remove sensitive information. [InputMessage={0}][SanitizedMessage={1}]".format(str(message), str(sanitized_message)))
39+
sanitized_message = re.sub(r'(https?://|ftp://)([^:/@\s]+):([^@/\s]*)@',r'\1\2@',message)
40+
self.logger.log_verbose("Message was sanitized to remove sensitive information. [InputMessage={0}][SanitizedMessage={1}]".format(str(message), str(sanitized_message)))
4341
return sanitized_message
4442
except Exception as error:
45-
self.logger.log_error("Error occurred while sanitizing credentials from message: {0}".format(repr(error)))
43+
self.logger.log_error("Error occurred while sanitizing credentials from message: [Error={0}]".format(repr(error)))
4644
return message
4745

src/extension/src/TelemetryWriter.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,6 @@ def __ensure_message_restriction_compliance(self, full_message):
7474
self.logger.log_telemetry_module_error("Error occurred while formatting message for a telemetry event. [Error={0}]".format(repr(e)))
7575
raise
7676

77-
7877
def __get_agent_supports_telemetry_from_env_var(self):
7978
""" Returns True if the env var AZURE_GUEST_AGENT_EXTENSION_SUPPORTED_FEATURES has a key of
8079
ExtensionTelemetryPipeline in the list. Value of the env var looks like this:

0 commit comments

Comments
 (0)