#
# List CPAN Releases including prerequisites
#
# Download a page for each author plus one for each release from CPAN (a total
# of around 15000), and write the authors, releases, versions, and
# prerequisites to STDOUT in YAML format:
#
# author:
#   Module-Name:
#     :version: "1.2.3"
#     :prereqs:
#       - Prerequisite-Release-Name
#
# Author: Dave Burt <dave at burt.id.au>
# Last modified: 21 Feb 2006
#

require 'open-uri'
require 'yaml'

BASE_URL = "http://search.cpan.org/"
doc = nil

puts "---"

for initial in "A".."Z"
  open("#{BASE_URL}author?#{initial}") {|f| doc = f.read }
  doc.scan(%r{href="/~([a-z]+)/"}).each do |author, _|
    puts "#{author}:"
    open("#{BASE_URL}~#{author}") {|f| doc = f.read }
    doc.scan(%r{href="([-.\w]+)-(v?[.\d]+)/"}).uniq.each do |release, version|
      puts "  \"#{release}\":\n"
      puts "    :version: \"#{version}\"\n"
      url = "#{BASE_URL}src/#{author.upcase}/#{release}-#{version}/Makefile.PL"
      begin
        open(url) {|f| doc = f.read }
        #TODO: strip comments, process different kinds of quotes around module
        #      names, qw(), conditionals
        if m = doc.match(/'PREREQ_PM'\s*=>\s*\{\s*(.*?)\s*\}/m)
          prerequisites = m[1].split(/\s*,\s*/m).map do |prereq|
            if m = prereq.match(/'?(.*?)'?\s*=>/)
              m[1]
            else
              prereq
            end
          end
          prerequisites.delete("")
          puts "    :prereqs:\n"
          puts prerequisites.map {|prereq| "      - \"#{prereq}\"" }
        end
      rescue OpenURI::HTTPError
        # Probably Makefile.PL Not Found
      end
    end
  end
end
