PowerShell script to download the XKCD archive

Since I’m a big fan of XKCD (and of PowerShell), I wrote a PowerShell script to download the entire XKCD archive. It’s not the most elegant script and to save time I re-used the Get-Webfile function from http://poshcode.org/417, but I thought I’d share it out broadly for the betterment of geekdom.
The entire script including the function is below. If you already have the
get-WebFile function loaded to your profile, you just need the last three lines which is just a bunch of HTML parsing regex.

function Get-WebFile {
param(
$url = (Read-Host “The URL to download”),
$fileName = $null,
[switch]$Passthru,
[switch]$quiet
)
$req = [System.Net.HttpWebRequest]::Create($url);
$res = $req.GetResponse();
if($fileName -and !(Split-Path $fileName)) {
$fileName = Join-Path (Get-Location -PSProvider “FileSystem”) $fileName
}
elseif((!$Passthru -and ($fileName -eq $null)) -or (($fileName -ne $null) -and (Test-Path -PathType “Container” $fileName)))
{
[string]$fileName = ([regex]'(?i)filename=(.*)$’).Match( $res.Headers[“Content-Disposition”] ).Groups[1].Value
$fileName = $fileName.trim(“/””‘”)
if(!$fileName) {
$fileName = $res.ResponseUri.Segments[-1]
$fileName = $fileName.trim(“/”)
if(!$fileName) {
$fileName = Read-Host “Please provide a file name”
}
$fileName = $fileName.trim(“/”)
if(!([IO.FileInfo]$fileName).Extension) {
$fileName = $fileName + “.” + $res.ContentType.Split(“;”)[0].Split(“/”)[1]
}
}
$fileName = Join-Path (Get-Location -PSProvider “FileSystem”) $fileName
}
if($Passthru) {
$encoding = [System.Text.Encoding]::GetEncoding( $res.CharacterSet )
[string]$output = “”
}
if($res.StatusCode -eq 200) {
[int]$goal = $res.ContentLength
$reader = $res.GetResponseStream()
if($fileName) {
$writer = new-object System.IO.FileStream $fileName, “Create”
}
[byte[]]$buffer = new-object byte[] 4096
[int]$total = [int]$count = 0
do
{
$count = $reader.Read($buffer, 0, $buffer.Length);
if($fileName) {
$writer.Write($buffer, 0, $count);
}
if($Passthru){
$output += $encoding.GetString($buffer,0,$count)
} elseif(!$quiet) {
$total += $count
if($goal -gt 0) {
Write-Progress “Downloading $url” “Saving $total of $goal” -id 0 -percentComplete (($total/$goal)*100)
} else {
Write-Progress “Downloading $url” “Saving $total bytes…” -id 0
}
}
} while ($count -gt 0)
$reader.Close()
if($fileName) {
$writer.Flush()
$writer.Close()
}
if($Passthru){
$output
}
}
$res.Close();
if($fileName) {
ls $fileName
}
}

$archivepage=get-WebFile http://www.xkcd.com/archive -Passthru
$a=[Regex]::Matches($archivepage, “< a href=./d+.*stitle.*< /a>” , “IgnoreCase”)|%{“
http://www.xkcd.com”+[Regex]::Match($_, ‘/[0-9]*/’)}
foreach($i in $a) {$i+”:”;$curcomic=get-WebFile $i -Passthru;$comicurl=[Regex]::Match($curcomic, ‘
http://imgs.xkcd.com/comics/[a-zA-Z0-9_()-]*(.jpg|.png)’ , “IgnoreCase”);get-WebFile $comicurl.Value}

Leave a Reply

Your email address will not be published. Required fields are marked *