46 lines
1.5 KiB
Python
46 lines
1.5 KiB
Python
import os
|
|
|
|
def check_utf8_encoding(filenames):
|
|
"""
|
|
Checks a list of filenames to detect potential UTF-8 encoding errors.
|
|
|
|
Args:
|
|
filenames (list): A list of strings representing filenames to check.
|
|
|
|
Returns:
|
|
dict: A dictionary where keys are filenames with errors and values are the error messages.
|
|
"""
|
|
error_report = {}
|
|
print("--- Starting UTF-8 Encoding Check ---")
|
|
|
|
for filename in filenames:
|
|
try:
|
|
# Attempt to decode the filename as UTF-8
|
|
filename.encode('utf-8')
|
|
# If encode succeeds, it's likely valid UTF-8
|
|
print(f"SUCCESS: {filename}")
|
|
except UnicodeEncodeError as e:
|
|
# If encoding fails, it indicates invalid UTF-8 sequences
|
|
error_report[filename] = str(e)
|
|
print(f"ERROR: {filename} - Decoding failed: {e}")
|
|
|
|
print("--- Check Complete ---")
|
|
if error_report:
|
|
print("\n--- Summary of Errors ---")
|
|
for filename, error in error_report.items():
|
|
print(f"File: {filename}\n Error: {error}\n")
|
|
else:
|
|
print("\nAll checked filenames appear to be valid UTF-8.")
|
|
|
|
return error_report
|
|
|
|
if __name__ == "__main__":
|
|
# Example list of filenames to check. Replace these with your actual directory contents.
|
|
files_to_check = [
|
|
"valid_file.txt",
|
|
"file_with_bad_byte\x80.bin", # Example of a potentially bad filename
|
|
"another_valid_one.md",
|
|
"file_with_another_error\x99.dat"
|
|
]
|
|
|
|
check_utf8_encoding(files_to_check) |