This script is a PHP port of my PowerShell script for ensuring data integrity. It identifies corrupted .jpg
, .jpeg
, .dng
and .cr2
files by determining their MD5 checksums and comparing them to previous checks. Of course, this script can also be used for other file types. If PHP is installed on the local computer or web server, it can be run with php checksum.php
directly from the command prompt (cmd.exe
, PowerShell
, Bash
etc.).
This script is primarily intended for archived data stocks that no longer change. Of course, you can also use it for data that is being edited from time to time. In this case, one must not forget that the MD5 checksums already change if, for example, the metadata of a JPG file is edited. So the message ATTENTION: Different MD5 checksums found
on the command prompt does not necessarily indicate a corrupted file.
yyyyMMdd_HHmmss_checksum.txt
.yyyyMMdd_HHmmss_log.txt
.<?php /** * Determine and compare MD5 checksums with PHP * * @author Helmut Kaczmarek <email@helmutkaczmarek.de> * @link https://wiki.helmutkaczmarek.de/code:php:checksum */ // Allowed file extensions $allowedExtensions = array('jpg', 'jpeg', 'dng', 'cr2'); // Directory containing the files to be examined $dataDirectory = 'D:\Folder'; // Directory containing this script $scriptDirectory = 'D:\Folder\Checksum'; // Folder name for checksum files (will be subfolder of $scriptDirectory) $listsDirectory = $scriptDirectory . '\Lists'; // Folder name for log files (will be subfolder of $scriptDirectory) $logsDirectory = $scriptDirectory . '\Logs'; // Number of checksum files and log files to be kept $filesToKeep = 10; // Timezone (otherwise $currentTimestamp will use UTC) date_default_timezone_set('Europe/Berlin'); // Generate the current timestamp in format YYYYMMDD_HHMMSS $currentTimestamp = date('Ymd_His'); // Check if the subfolders exist, otherwise create them if (!is_dir($listsDirectory)) { mkdir($listsDirectory); } if (!is_dir($logsDirectory)) { mkdir($logsDirectory); } // File name for the checksum file and log file including time stamp $checksumFilename = $listsDirectory . '\\' . $currentTimestamp . '_checksum.txt'; $logFilename = $logsDirectory . '\\' . $currentTimestamp . '_log.txt'; // Function to search directories recursively function scanDirectory($directory) { global $allowedExtensions, $checksumFilename; $files = scandir($directory); foreach ($files as $file) { if ($file !== '.' && $file !== '..') { $filePath = $directory . '\\' . $file; if (is_dir($filePath)) { scanDirectory($filePath); } else { $extension = strtolower(pathinfo($file, PATHINFO_EXTENSION)); if (in_array($extension, $allowedExtensions)) { $md5Checksum = md5_file($filePath); // Write the MD5 checksum to the checksum file file_put_contents($checksumFilename, "$md5Checksum\t$filePath\n", FILE_APPEND); // Display the current filename on the command prompt echo "Processing $filePath\n"; } } } } } // Search the directory recursively for files scanDirectory($dataDirectory); // Write log file $logEntry = "MD5 checksums have been created and stored in $checksumFilename.\n"; file_put_contents($logFilename, $logEntry, FILE_APPEND); // Function to delete old files function deleteOldFiles($directory) { global $filesToKeep; $allFiles = glob($directory . '\*.*'); // Use array_multisort() to sort by modification time in descending order array_multisort(array_map('filemtime', $allFiles), SORT_DESC, $allFiles); // Only keep the last $filesToKeep $filesToKeepArray = array_slice($allFiles, 0, $filesToKeep); foreach ($allFiles as $file) { if (!in_array($file, $filesToKeepArray)) { unlink($file); } } } // Delete old checksum files and log files deleteOldFiles($listsDirectory); deleteOldFiles($logsDirectory); // Checksum file comparison function function compareChecksumFiles($file1, $file2) { $checksums1 = file($file1, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); $checksums2 = file($file2, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); $differentFiles = array(); foreach ($checksums1 as $line1) { list($checksum1, $file1) = explode("\t", $line1); foreach ($checksums2 as $line2) { list($checksum2, $file2) = explode("\t", $line2); if ($file1 === $file2 && $checksum1 !== $checksum2) { $differentFiles[] = $file1; break; } } } return $differentFiles; } // Paths to the last two checksum files $lastChecksumFiles = glob($listsDirectory . '\*_checksum.txt', GLOB_NOSORT); if (count($lastChecksumFiles) >= 2) { $lastChecksumFile1 = $lastChecksumFiles[count($lastChecksumFiles) - 1]; $lastChecksumFile2 = $lastChecksumFiles[count($lastChecksumFiles) - 2]; $differentFiles = compareChecksumFiles($lastChecksumFile1, $lastChecksumFile2); // Output and log file $logMessage = ''; if (count($differentFiles) > 0) { echo "ATTENTION: Different MD5 checksums found! See log file in $logFilename.\n"; $logMessage .= "ATTENTION: The following files have different checksums:\n"; foreach ($differentFiles as $file) { $logMessage .= "$file\n"; } } else { $logMessage .= "INFO: No different MD5 checksums found.\n"; echo "INFO: No different MD5 checksums found.\n"; } file_put_contents($logFilename, $logMessage, FILE_APPEND); } echo $logEntry; ?>