#!/usr/bin/perl -w
#
# Part of the cbm4linux_utils package
# Copyright (C) 2001  Hugo Cornelis <hugo@bbf.uia.ac.be>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with cbm4linux, a linux loadable module to attach a cbm 
# drive to a linux box, available at http://sta.c64.org/ ;
# if not, write to the Free Software Foundation, 
# Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
# $Project: cbm4linux_utils $
# $ProjectHeader: cbm4linux_utils 0.5 Thu, 16 Aug 2001 16:16:48 +0200 cbm64 $
# $Id: d64index 1.5 Thu, 16 Aug 2001 16:16:48 +0200 cbm64 $
#


=head1 NAME 

C<d64index> - Index a set of .d64 files, export to an XML-based file.

=head1 SYNOPSIS

  d64index

=head1 DESCRIPTION

Index a set of .d64 files, export to an XML-based file.  images are
supposed to be somewhere in your directory hierarchy underneath your
currently working directory.  Output goes to stdout.  A log of all 
converted images goes to stderr.

After this script apply an XSL stylesheet to convert to html.  The
cbm4linux_utils package comes with a XSL stylesheet 'd64tohtml.xsl'
that can be used with a command line like '$ java
org.apache.xalan.xslt.Process -IN data.xml -XSL d64tohtml.xsl -out
data.html'. Check the samples directory of that package for an
example.

As far as I know this perl script does a good to very good job under
all circumstances.

However a file entry may contain shifted spaces, which normally
terminates the filename.  After these shifted spaces you can give some
info about the file as a comment.  Some guys like e.g. to put ',8,1:'
as a comment.  If your comments contains spaces and then something
that can can match with a filetype, you are in trouble.

For disk images with invalid directories entries, you may get weird
and faulty results.  Further I must admit that I''m not sure if
backspaces are handled correctly.  From what I remember they give
weird output on the cbm64 screen, but you can load the files correctly
if you address them with backspaces in the filename (which is normally
impossible to type, you need some trickery to do so.


This is a sample XML file for one image (which seems to be formatted
very badly :

C<<disk file="side_1.d64" 
	extension="d64" 
	type="side_1.d64: .d64 disk image (CBM64)" >>
C<  <header preamble="0 ." 
		title="qwertyuiopasdf  " 
		extension="s22 a" />>
C<  <file name="boot-hires      " 
		comment="" 
		type=" prg " 
		blocks="0" />>
C<  <file name="h.e.r.o." 
		comment="" 
		type=" prg " 
		blocks="57" />>
C<  <file name="wimi" 
		comment="" 
		type=" prg " 
		blocks="46" />>
C<  <file name="eagle soft" 
		comment="" 
		type=" prg " 
		blocks="40" />>
C<  <file name="defender" 
		comment="" 
		type=" prg " 
		blocks="40" />>
C<  <file name="defender2" 
		comment="" 
		type=" prg " 
		blocks="40" />>
C<  <tail freeblocks="441" />>
C<</disk>>


=head1 SEE ALSO

cbmctrl(1),d64copy(1)

=cut

use strict;

# global return code

my $exit_status = 0 ;

=head1 INTERNALS

Following sections describes internals of this file.  They may or may not be
correct.

=cut



=head2 C<$c1541>

c1541 executable, change to where it can be found if it is not in your
C<$PATH>.

=cut

my $c1541 = "c1541" ;



=head2 C<XMLProtect($string)>

Process $string and protect characters such that they are safe to use
in an XML file.

=cut

sub XMLProtect ($)
{
  # get string
  
  my ( $string ) = @_ ;

  # replace '<', '&', '"'

  $string =~ s/&/&#038;/g ;
  $string =~ s/</&#139;/g ;
  $string =~ s/%/&#037;/g ;
  $string =~ s/\"/&quot;/g ;

  # return result
  
  return $string ;
}


=head2 C<ProcessD64($d64)>

Process $d64 and output some XML description with its contents. 
$d64 is something that resembles an .d64 image file.

=cut

sub ProcessD64 ($)
{
  # get image
  
  my ( $image ) = @_ ;

  # give some diags to stderr

  print STDERR "Processing $image\n";

  # get type of file

  my $type = `file "$image"` ;

  $type =~ s/[\"\n\r\f\b]//g ;

  # get extension

  my $extension = $image ;

  $extension =~ s/.*\.(.*)/$1/ ;

  $image = XMLProtect $image ;
  $extension = XMLProtect $extension ;
  $type = XMLProtect $type ;

  # output header

  printf
    "  <disk"
      . " file=\"$image\""
	. " extension=\"$extension\""
	  ." type=\"$type\""
	    ." >\n" ;

  $image = quotemeta $image ;

  my $command = "$c1541 -attach $image -@ -dir -@ |" ;

#  print STDERR $command ;

  open (D64,"$c1541 -attach $image -@ -dir -@ |") ;

  if ( <D64> )
    {  
      $_ = <D64> ;

      if ( m/([^,]*),([^,]*),([^,]*),([^,]*)/ )
	{
	  my $number = $1 ;
	  my $message = $2 ;
	  my $track = $3 ;
	  my $sector = $3 ;
	  
	  $number = XMLProtect $number ;
	  $message = XMLProtect $message ;
	  $track = XMLProtect $track ;
	  $sector = XMLProtect $sector ;
	  
	  printf
	    "    <errorchannel"
	      . " number=\"$number\""
		. " message=\"$message\""
		  ." track=\"$track\""
		    ." sector=\"$sector\""
		      . " />\n" ;
	}
      
      if ( m/([^\"]*)\"([^\"]*)\" ([^\n\j]*)/ )
	{
	  my $preamble = $1 ;
	  my $title = $2 ;
	  my $headerextension = $3 ;
	  
	  $preamble = XMLProtect $preamble ;
	  $title = XMLProtect $title ;
	  $headerextension = XMLProtect $headerextension ;
	  
	  printf
	    "    <header"
	      . " preamble=\"$preamble\""
		. " title=\"$title\""
		  ." extension=\"$headerextension\""
		    . " />\n" ;
	}
      
      while ( <D64> )
	{
	  #del,seq,prg,usr,rel
	  
	  if (
	      m{
		([0-9]*)			# blocks
		[^\"]*\"([^\"]*)\" 		# file name
		(\ *([^ ]+).*)?\ * 		# possible comment
		(.[dspur][ers][lqgr].)\ * 	# file type, does not match
						# special entries
	       }x
	     )
	    {
	      # A file entry may contian shifted spaces, which normally
	      # terminates the filename.  After these shifted spaces
	      # you can give some info about the file as a comment.
	      # Some guys like e.g. to put ',8,1:' as a comment.
	      # If your comments contains spaces and then something that can
	      # can match with a filetype, you are in trouble.
	      #
	      # For disk images with invalid directories entries,
	      # you may get weird and faulty results.
	      # Further I must admit that I'm not sure if backspaces
	      # are handled correctly.  From what I remember they give weird
	      # output on the cbm64 screen, 
	      # but you can load the files correctly
	      # if you address them with backspaces in the filename (which is
	      # normally impossible to type, you need some trickery to do so.
	      #
	      
	      my $name = $2 ;
	      my $comment = $4 ;
	      
	      my $type = $5 ;
	      my $blocks = $1 ;
	      
	      if ( $comment )
		{
		  $comment = $3 ;
		}
	      else
		{
		  $comment = "" ;
		}
	      
	      $comment = XMLProtect $comment ;
	      $name = XMLProtect $name ;
	      $type = XMLProtect $type ;
	      $blocks = XMLProtect $blocks ;
	      
	      printf
		"    <file"
		  . " name=\"$name\""
		    . " comment=\"$comment\""
		      . " type=\"$type\""
			. " blocks=\"$blocks\""
			  . " />\n" ;
	    }
	  
	  if ( m/([^,]*),([^,]*),([^,]*),([^,]*)/ )
	    {
	      my $number = $1 ;
	      my $message = $2 ;
	      my $track = $3 ;
	      my $sector = $3 ;
	      
	      $number = XMLProtect $number ;
	      $message = XMLProtect $message ;
	      $track = XMLProtect $track ;
	      $sector = XMLProtect $sector ;
	      
	      printf
		"    <errorchannel"
		  . " number=\"$number\""
		    . " message=\"$message\""
		      ." track=\"$track\""
			." sector=\"$sector\""
			  . " />\n" ;
	    }
	  
	  if ( m/([0-9]*) blocks free/ )
	    {
	      my $blocks = $1 ;
	      
	      $blocks = XMLProtect $blocks ;
	      
	      printf "    <tail freeblocks=\"$blocks\" />\n" ;
	    }
	}
    }
  else
    {
      printf "      <--- c1541 returned no information --->\n" ;
      printf "      <---       normally means c1541 ended prematurely --->\n" ;
      printf "      <---       normally means c1541 ended with SEGV --->\n" ;
    }
  
  # output trailer

  printf "  </disk>\n" ;

  # return result
  
  return 1 ;
}


# output a global header

my $date = `date` ;
my $machine = `uname -a` ;
my $directory = `pwd` ;

chomp $date ;
chomp $machine ;
chomp $directory ;

$date =~ s/%/%%/g ;
$machine =~ s/%/%%/g ;
$directory =~ s/%/%%/g ;

$date =~ s/\"/\\\"/g ;
$machine =~ s/\"/\\\"/g ;
$directory =~ s/\"/\\\"/g ;

printf
  "<d64database"
  . "\n\t date=\"" . $date . "\""
  . "\n\t creator=\"$0\""
  . "\n\t user=\"$ENV{'USER'}\""
  . "\n\t machine=\"" . $machine . "\""
  . "\n\t directory=\"" . $directory . "\""
  . " >\n" ;

# look for all disk files

open (IMAGES,"find 2>/dev/null . -name \"*.[DdGgXx]64\" -o -name \"*.[Dd]71\" -o -name \"*.[Dd]8[012]\" |") ;

# loop over all found images

while ( <IMAGES> )
  {
    chomp ;

    ProcessD64 $_ ;
  }

printf "</d64database>\n" ;

exit $exit_status ;
