#! /usr/bin/perl
#
# $Id: captivemodid-list.pl,v 1.2 2005/12/26 12:16:24 lace Exp $
# Extract the list of files to download from Microsoft.
# Copyright (C) 2005 Jan Kratochvil <project-captive@jankratochvil.net>
# 
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; exactly version 2 of June 1991 is required
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA


use strict;
use warnings;

require LWP::Simple;
use URI::Escape;


sub unamp($)
{
my($ref)=@_;

	die "non-amp amp: $$ref" if $$ref=~/&(?!amp;)/;
	$$ref=~s/&amp;/&/go;
}

print STDERR "Downloading index: ";
my $found_total;
my @found;
my $found_processed=0;
my $next='results.aspx?freetext=&productID=4C937A02-BAE0-4317-A1A9-0C56CD979D05&categoryId=7&period=&sortCriteria=date&nr=50&DisplayLang=en&type=a';
my $BASE='http://www.microsoft.com/downloads/';
my $VALIDATION_CODE=$ARGV[0] or die "ARGV[0]=VALIDATION_CODE";

while ($next) {
	my $page=LWP::Simple::get($BASE.$next) or die;
	my($page_total,$page_first,$page_last)=($page=~m{<b>(\d+)</b> results found; results <b>(\d+)-(\d+)</b> shown.}) or die;
	die if !$page_total;
	$found_total=$page_total if !$found_total;
	$found_total==$page_total or die;
	$found_processed+1==$page_first or die "(found_processed+1)(".($found_processed+1)."!=page_first($page_first)";
	$page_last>=$page_first or die;
	if ($page_last<$found_total) {
		$page_last==$page_first+50-1 or die;
		($next)=($page=~m{<a href="([^"]*)">Next\s+&gt;</a>}) or die;
		unamp \$next;
		$next=~/^info.aspx[?]/ or die;
		}
	else {
		$page_last==$found_total or die;
		$next=undef();
		}

	while ($page=~m{<p><a href="([^"]*)">[^<]*</a>(?:&nbsp;<a href=[^>]*><img[^>]*\balt="Genuine Windows download"[^>]*></a>)?</p>}g) {
		my($url)=($1);
		$found_processed++;
		unamp \$url;
		push @found,$url;
		}
	print STDERR ".";
	$found_processed==$page_last or die "found_processed(".($found_processed).")!=page_last($page_last)";
	}
print STDERR " found: ".(0+@found)."\n";

my @download;
print STDERR "Downloading product pages: ";
for my $found (@found) {
	my $page=LWP::Simple::get($BASE.$found) or die;
	my($lang)=($page=~m{<select name="displaylang" [^>]*>((?:<option [^>]*>[^<>]*</option>)+)</select>});
	# <option value="en" selected>English</option><option value="fr">French</option></select>
	print STDERR "+";
	my @stage2;
	if (!$lang) {
		@stage2=$found;
		}
	else {
		while ($lang=~s{<option value="([^"]*)"(?: selected)?>([^<>]*)</option>}{}) {
			my($short,$long)=($1,$2);
			(my $found_lang=$found)=~s/((?:\b|%26)DisplayLang(?:=|%3d))en\b/$1$short/ or die $found;
			push @stage2,$found_lang;
			}
		!$lang or die;
		die if !@stage2;
		}
	for my $stage2 (@stage2) {
		my $delim='&';
		$delim='%26' if $stage2=~/%26DisplayLang/i;
		$stage2.="${delim}Hash=$VALIDATION_CODE";
		$stage2=$BASE.$stage2;
		my $page=LWP::Simple::get($stage2) or die $stage2;
		while ($page=~m{<h2>Object moved to <a href="([^"]*)">}) {
			$stage2=$1;
			unamp \$stage2;
			$page=LWP::Simple::get($stage2) or die $stage2;
			}
		my $download;
		while ($page=~m{window.open(?:.)'(http://download.microsoft.com/download/[^']*)',null,}g) {
			die if $download;
			$download=$1;
			}
		if ($download) {
			push @download,$download;
			print STDERR ".";
			}
		elsif ($page=~m{<table id="multiFileList"(.*?)</table>}) {
			my $files=$1;
			my $download_orig=@download;
			while ($files=~m{<a href="([^"]*)">[^<>]*</a>}g) {
				my $url=$1;
				unamp \$url;
				push @download,$url;
				}
			die if $download_orig==@download;
			print STDERR "_";
			}
		elsif ($page=~m{<div id="regsysNotRegistered">}) {
			print STDERR "!";
			}
		else {
			die $stage2;
			}
		}
	}
for (@download) {
	next if !/^info.aspx[?].*&u=(http%3a[^&]*)$/;
	$_=uri_unescape $1;
	}
@download=sort keys(%{{ map(($_=>1),@download) }});
print STDERR " found downloads: ".(0+@download)."\n";
print "$_\n" for @download;
