To ensure the data integrity of my photo archive, I wrote a PowerShell script that identifies corrupted .jpg
, .jpeg
, .dng
and .cr2
files by determining their MD5 checksums and comparing them with previous checks. In principle, this script can also be used for other file types.
This script is primarily intended for archived data stocks that no longer change. Of course, you can also use it for data that is being edited from time to time. In this case, one must not forget that the MD5 checksums already change if, for example, the metadata of a JPG file is edited. So the message ATTENTION: Different MD5 checksums found
on the command prompt does not necessarily indicate a corrupted file.
Note: There is also a PHP port of this script.
yyyyMMdd_HHmmss_checksum.txt
.yyyyMMdd_HHmmss_log.txt
.####################################################################### ## Determine and compare MD5 checksums with PowerShell ## ## ----------------------------------------------------------------- ## ## Author: Helmut Kaczmarek <email@helmutkaczmarek.de> ## ## URL: https://wiki.helmutkaczmarek.de/code:powershell:checksum ## ####################################################################### # Settings $dataFolder = "D:\Folder" $scriptFolder = "D:\Folder\Checksum" $checksumFolderPath = Join-Path -Path $scriptFolder -ChildPath "Lists" $logFolderPath = Join-Path -Path $scriptFolder -ChildPath "Logs" $allowedExtensions = "*.jpg", "*.jpeg", "*.dng", "*.cr2" $timestamp = Get-Date -Format "yyyyMMdd_HHmmss" # Create folder for checksum files (will be subfolder of $scriptFolder) if (-not (Test-Path -Path $checksumFolderPath -PathType Container)) { New-Item -Path $checksumFolderPath -ItemType Directory } # Create folder for log files (will be subfolder of $scriptFolder) if (-not (Test-Path -Path $logFolderPath -PathType Container)) { New-Item -Path $logFolderPath -ItemType Directory } # Create the checksum file $checksumList = Join-Path -Path $checksumFolderPath -ChildPath ("${timestamp}_checksum.txt") Add-Content -Path $checksumList -Value "MD5 checksums on: $(Get-Date -Format 'yyyy-MM-dd HH:mm:ss')" Get-ChildItem -Recurse -File -Path $dataFolder -Include $allowedExtensions | ForEach-Object { $file = $_.FullName $md5 = Get-FileHash -Algorithm MD5 -Path $file $logEntry = "$($md5.Hash) $file" Add-Content -Path $checksumList -Value $logEntry Write-Host "Processing $file" } # Keep the 10 most recent checksum files $allChecksumFiles = Get-ChildItem -Path $checksumFolderPath -Filter "*_checksum.txt" | Sort-Object -Property LastWriteTime -Descending $oldChecksumFiles = $allChecksumFiles | Select-Object -Skip 10 foreach ($oldFile in $oldChecksumFiles) { Remove-Item -Path $oldFile.FullName -Force } # Determine the last two checksum files $allChecksumFiles = Get-ChildItem -Path $checksumFolderPath -Filter "*_checksum.txt" | Sort-Object -Property LastWriteTime -Descending if ($allChecksumFiles.Count -lt 2) { Write-Host "INFO: Checksums could not be compared because there is currently only one checksum file." $logFile = Join-Path -Path $logFolderPath -ChildPath ("${timestamp}_log.txt") Add-Content -Path $logFile -Value "$(Get-Date -Format 'yyyy-MM-dd HH:mm:ss'): INFO: Checksums could not be compared because there is currently only one checksum file." Write-Host "The following log file was created:" Write-Host $logFile } else { $latestChecksumFile = $allChecksumFiles[0] $previousChecksumFile = $allChecksumFiles[1] # Read the checksums and filenames from the last two checksum files $latestChecksums = Get-Content -Path $latestChecksumFile.FullName $previousChecksums = Get-Content -Path $previousChecksumFile.FullName # Compare the checksums and create the log file $differentFiles = @() for ($i = 0; $i -lt $latestChecksums.Count; $i++) { $latestChecksum = $latestChecksums[$i] -split ' ', 2 $previousChecksum = $previousChecksums[$i] -split ' ', 2 $latestHash = $latestChecksum[0] $latestFile = $latestChecksum[1] $previousHash = $previousChecksum[0] $previousFile = $previousChecksum[1] if ($latestFile -eq $previousFile -and $latestHash -ne $previousHash) { $differentFiles += $latestFile } } # Create the log file $logFile = Join-Path -Path $logFolderPath -ChildPath ("${timestamp}_log.txt") Add-Content -Path $logFile -Value "Directory monitoring on: $(Get-Date -Format 'yyyy-MM-dd HH:mm:ss')" if ($differentFiles.Count -gt 0) { Write-Host "ATTENTION: Different MD5 checksums found!" Add-Content -Path $logFile -Value "ATTENTION: The following files have different checksums:" foreach ($file in $differentFiles) { Add-Content -Path $logFile -Value " $file" } } else { Write-Host "INFO: No different MD5 checksums found." Add-Content -Path $logFile -Value "INFO: No different MD5 checksums found." } # Keep the 10 most recent log files $allLogFiles = Get-ChildItem -Path $logFolderPath -Filter "*_log.txt" | Sort-Object -Property LastWriteTime -Descending $oldLogFiles = $allLogFiles | Select-Object -Skip 10 foreach ($oldFile in $oldLogFiles) { Remove-Item -Path $oldFile.FullName -Force } # Display a message with the path to the created log file in the command prompt Write-Host "The following log file was created:" Write-Host $logFile }