import os def check_utf8_encoding(filenames): """ Checks a list of filenames to detect potential UTF-8 encoding errors. Args: filenames (list): A list of strings representing filenames to check. Returns: dict: A dictionary where keys are filenames with errors and values are the error messages. """ error_report = {} print("--- Starting UTF-8 Encoding Check ---") for filename in filenames: try: # Attempt to decode the filename as UTF-8 filename.encode('utf-8') # If encode succeeds, it's likely valid UTF-8 print(f"SUCCESS: {filename}") except UnicodeEncodeError as e: # If encoding fails, it indicates invalid UTF-8 sequences error_report[filename] = str(e) print(f"ERROR: {filename} - Decoding failed: {e}") print("--- Check Complete ---") if error_report: print("\n--- Summary of Errors ---") for filename, error in error_report.items(): print(f"File: {filename}\n Error: {error}\n") else: print("\nAll checked filenames appear to be valid UTF-8.") return error_report if __name__ == "__main__": # Example list of filenames to check. Replace these with your actual directory contents. files_to_check = [ "valid_file.txt", "file_with_bad_byte\x80.bin", # Example of a potentially bad filename "another_valid_one.md", "file_with_another_error\x99.dat" ] check_utf8_encoding(files_to_check)