""" cksgen -- Generate and compare MD5 checksum lists with Python ============================================================= Author: Helmut Kaczmarek "] for line1 in checksums1: checksum1, file1 = line1.split('\t') for line2 in checksums2: checksum2, file2 = line2.split('\t') if file1 == file2 and checksum1 != checksum2: different_files.append(file1) break return different_files if __name__ == "__main__": parser = argparse.ArgumentParser(description="Generate checksums for files.") parser.add_argument("-conf", "--config", type=str, help="Configuration file name without extension") args = parser.parse_args() if args.config: config_filename = args.config + ".conf" config = load_config(config_filename) conf_name = config.get('conf_name') allowed_extensions = config.get('allowed_extensions').split(',') data_directory = config.get('data_directory') files_to_keep = int(config.get('files_to_keep')) script_directory = os.path.dirname(os.path.abspath(__file__)) lists_directory = os.path.join(script_directory, conf_name, 'Lists') logs_directory = os.path.join(script_directory, conf_name, 'Logs') os.makedirs(lists_directory, exist_ok=True) os.makedirs(logs_directory, exist_ok=True) current_timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S') current_datetime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') checksum_filename = os.path.join(lists_directory, f'{current_timestamp}_checksum.txt') log_filename = os.path.join(logs_directory, f'{current_timestamp}_log.txt') def scan_directory(directory): with open(checksum_filename, 'a') as f: f.write(f"MD5 checksums on: {current_datetime}\n") for root, _, files in os.walk(directory): for file in files: file_path = os.path.join(root, file) extension = file_path.split('.')[-1].lower() if extension in allowed_extensions: md5_checksum = hashlib.md5(open(file_path, 'rb').read()).hexdigest() with open(checksum_filename, 'a') as f: f.write(f'{md5_checksum}\t{file_path}\n') print(f'Processing {file_path}') scan_directory(data_directory) log_entry = f'MD5 checksums have been created and stored in {checksum_filename}.\n' with open(log_filename, 'a') as f: f.write(log_entry) delete_old_files(lists_directory, files_to_keep) delete_old_files(logs_directory, files_to_keep) last_checksum_files = glob.glob(os.path.join(lists_directory, '*_checksum.txt')) if len(last_checksum_files) >= 2: last_checksum_files.sort(reverse=True) last_checksum_file1 = last_checksum_files[0] last_checksum_file2 = last_checksum_files[1] different_files = compare_checksum_files(last_checksum_file1, last_checksum_file2) log_message = '' if different_files: print('ATTENTION: Different MD5 checksums found! See log file in', log_filename) log_message += 'ATTENTION: The following files have different checksums:\n' for file in different_files: log_message += file + '\n' else: log_message += 'INFO: No different MD5 checksums found.\n' print('INFO: No different MD5 checksums found.') with open(log_filename, 'a') as f: f.write(log_message) elif len(last_checksum_files) == 1: with open(log_filename, 'a') as f: f.write("INFO: Checksums could not be compared because there is currently only one checksum file.\n") print("INFO: Checksums could not be compared because there is currently only one checksum file.") print(log_entry)