package Language::INTERCAL::Object;

# Object file library

# This file is part of CLC-INTERCAL

# Copyright (c) 2006 Claudio Calvelli, all rights reserved.

# CLC-INTERCAL is copyrighted software. However, permission to use, modify,
# and distribute it is granted provided that the conditions set out in the
# licence agreement are met. See files README and COPYING in the distribution.

use strict;
use vars qw($PERVERSION @EXPORT_OK);
$PERVERSION = "CLC-INTERCAL INTERCAL/Object.pm 1.-94.-4";
@EXPORT_OK = qw(find_code forall_code make_code);

use Carp;
use Config;
use POSIX 'strftime';

use Language::INTERCAL::Exporter '1.-94.-4',
	qw(import is_intercal_number compare_version require_version);
use Language::INTERCAL::GenericIO '1.-94.-4',
	qw($stdwrite $stdread $stdsplat $devnull);
use Language::INTERCAL::Optimiser '1.-94.-4';
use Language::INTERCAL::Parser '1.-94.-4';
use Language::INTERCAL::Splats '1.-94.-4', qw(:SP);
use Language::INTERCAL::ByteCode '1.-94.-4',
	qw(BC_STS BC_CRE BC_DES BC_NOT BC_DSX BC_LAB BC_QUA BC_BUG
	   BC bc_skip BCget is_constant);

# oldest objects we can read and understand
use constant MIN_VERSION => '1.-94.-4';

sub new {
    @_ == 1 or croak "Usage: new Language::INTERCAL::Object";
    my ($class) = @_;
    my $s = Language::INTERCAL::SymbolTable->new();
    my @p = (
	Language::INTERCAL::Parser->new($s),
	Language::INTERCAL::Parser->new($s),
    );
    my $o = Language::INTERCAL::Optimiser->new();
    my @now = gmtime(time);
    my @ts = map { strftime($_, @now) } qw(%Y %m %d %H %M %S);
    _new($class, $s, \@p, $o, \@ts);
}

sub _new {
    my ($class, $s, $p, $o, $ts) = @_;
    bless {
	'read_fh' => $stdread,
	'write_fh' => $stdwrite,
	'splat_fh' => $stdsplat,
	'trace_fh' => $stdsplat,
	'rs_fh' => $devnull,
	'optimiser' => $o,
	'thread' => [],
	'flags' => [],
	'code' => ['', {}],
	'source' => '',
	'symbols' => $s,
	'parsers' => $p,
	'bug' => [0, 1],
	'timestamp' => $ts,
    }, $class;
}

sub setbug {
    @_ == 3 or croak "Usage: OBJECT->setbug(TYPE, VALUE)";
    my ($object, $type, $value) = @_;
    $value < 0 || $value > 100 and croak "Invalid BUG value";
    $object->{bug} = [$type ? 1 : 0, $value];
    $object;
}

sub symboltable {
    @_ == 1 or croak "Usage: OBJECT->symboltable";
    my ($object) = @_;
    $object->{symbols};
}

sub num_parsers {
    @_ == 1 or croak "Usage: OBJECT->num_parsers";
    my ($object) = @_;
    scalar @{$object->{parsers}};
}

sub parser {
    @_ == 2 or croak "Usage: OBJECT->parser(NUMBER)";
    my ($object, $number) = @_;
    $number < 1 || $number > @{$object->{parsers}}
	and croak "Invalid NUMBER";
    $object->{parsers}[$number - 1];
}

sub shift_parsers {
    @_ == 1 or croak "Usage: OBJECT->shift_parsers";
    my ($object) = @_;
    shift @{$object->{parsers}};
    my $p = Language::INTERCAL::Parser->new($object->{symbols});
    push @{$object->{parsers}}, $p;
}

sub write {
    @_ == 2 or croak "Usage: write Language::INTERCAL::Object(FILEHANDLE)";
    my ($class, $fh) = @_;
    while (1) {
	my $line = $fh->write_text();
	croak "Invalid Object Format (no __END__)"
	    if $line eq '';
	last if $line =~ /__END__/ || $line =~ /__DATA__/;
    }
    my $line = $fh->write_text();
    $line =~ /^CLC-INTERCAL (\S+) Object File\n$/
	or croak "Invalid Object Format ($line)";
    my $perversion = $1;
    is_intercal_number($perversion)
	or croak "Invalid Object Perversion ($perversion)";
    compare_version($perversion, MIN_VERSION) >= 0
	or croak "Object too old to load with this perversion of sick";
    require_version Language::INTERCAL::Object $perversion;
    my @timestamp = unpack('vCCCCC', $fh->write_binary(7));
    my $fcount = unpack('v', $fh->write_binary(2));
    my @flags = ();
    while (@flags < $fcount) {
	my $flen = unpack('v', $fh->write_binary(2));
	push @flags, $fh->write_binary($flen);
    }
    my $clen = unpack('v', $fh->write_binary(2));
    my $code = $fh->write_binary($clen);
    my $ns = unpack('v', $fh->write_binary(2));
    my %code = ();
    while ($ns-- > 0) {
	my ($sval, $nr) = unpack('vv', $fh->write_binary(4));
	my @r = ();
	while (@r < $nr) {
	    my ($ju, $sl, $ls, $ll, $ds, $dl, $ge, $ab, $qu, $xs, $xl, $rl) =
		unpack('vvvvvvCCCvvv', $fh->write_binary(21));
	    my $ru = $fh->write_binary($rl);
	    my @rb = split(//, unpack('b*', $ru));
	    my @ru = grep { $rb[$_] } (0..$#rb);
	    push @r,
		[$ju, $sl, $ls, $ll, $ds, $dl, $ge, $ab, $qu, $xs, $xl, @ru];
	}
	$code{$sval} = \@r;
    }
    my $slen = unpack('v', $fh->write_binary(2)) || 0;
    my $source = $fh->write_binary($slen);
    my $syms = Language::INTERCAL::SymbolTable->write($fh);
    my $psize = unpack('v', $fh->write_binary(2)) || 0;
    my @p = ();
    while (@p < $psize) {
	push @p, Language::INTERCAL::Parser->write($fh, $syms);
    }
    my $o = Language::INTERCAL::Optimiser->write($fh);
    my $obj = _new($class, $syms, \@p, $o, \@timestamp);
    $obj->{code} = [$code, \%code];
    $obj->{source} = $source;
    $obj->{flags} = \@flags;
    $obj;
}

sub read {
    @_ == 2 or croak "Usage: read Language::INTERCAL::Object(FILEHANDLE)";
    my ($obj, $fh) = @_;
    my ($perversion) = $PERVERSION =~ /(\S+)$/;
    $fh->read_text($Config{startperl} . "\n");
    $fh->read_text("# GENERATED BY CLC-INTERCAL $perversion\n");
    $fh->read_text("# TO MODIFY, EDIT SOURCE AND REPACKAGE\n");
    $fh->read_text("\n");
    $fh->read_text("use FindBin qw(\$Bin);\n");
    $fh->read_text("use File::Spec;\n");
    $fh->read_text("use Language::INTERCAL::GenericIO '$perversion';\n");
    $fh->read_text("use Language::INTERCAL::Interpreter '$perversion';\n");
    $fh->read_text("\n");
    $fh->read_text("my \$p = File::Spec->rel2abs(\$0, \$Bin);\n");
    $fh->read_text("my \$fh = Language::INTERCAL::GenericIO->new" .
		   "('FILE', 'w', \$p);\n");
    $fh->read_text("my \$int = Language::INTERCAL::Interpreter->write" .
		   "(\$fh);\n");
    $fh->read_text("\$int->start()->run()->stop();\n");
    $fh->read_text("\n");
    $fh->read_text("__DATA__\n");
    $fh->read_text("CLC-INTERCAL $perversion Object File\n");
    $fh->read_binary(pack('vCCCCC', @{$obj->{timestamp}}));
    $fh->read_binary(pack('v', scalar @{$obj->{flags}}));
    for my $flag (@{$obj->{flags}}) {
	$fh->read_binary(pack('va*', length $flag, $flag));
    }
    my ($cs, $cp) = @{$obj->{code}};
    my @cp = keys %$cp;
    $fh->read_binary(pack('va*v', length $cs, $cs, scalar @cp));
    for my $s (@cp) {
	my $p = $cp->{$s};
	$fh->read_binary(pack('vv', $s, scalar @$p));
	for my $q (@$p) {
	    my ($ju, $sl, $ls, $ll, $ds, $dl, $ge, $ab, $qu, $xs, $xl, @ru) =
		@$q;
	    my $ru = '';
	    vec($ru, $_, 1) = 1 for @ru;
	    $fh->read_binary(pack('vvvvvvCCCvvv', $ju, $sl, $ls, $ll,
						  $ds, $dl, $ge, $ab,
						  $qu, $xs, $xl, length $ru));
	    $fh->read_binary($ru);
	}
    }
    my $source = defined $obj->{source} ? $obj->{source} : '';
    $fh->read_binary(pack('va*', length $source, $source));
    $obj->{symbols}->read($fh);
    $fh->read_binary(pack('v', scalar @{$obj->{parsers}}));
    for my $p (@{$obj->{parsers}}) {
	$p->read($fh);
    }
    $obj->{optimiser}->read($fh);
    $obj;
}

sub make_code {
    @_ == 1 or croak "Usage: make_code(NEWCODE)";
    my ($newcode) = @_;
    my %obj = (bug => [0, 0]);
    _setcode(\%obj, $newcode);
    wantarray ? @{$obj{code}} : $obj{code}[0];
}

sub setcode {
    @_ == 3 or croak "Usage: OBJECT->set_code(CODE, CPTR)";
    my ($obj, $code, $cptr) = @_;
    $obj->{code} = [$code, $cptr];
    $obj;
}

sub code {
    @_ == 1 || @_ == 2 or croak "Usage: OBJECT->code [(NEWCODE)]";
    my $obj = shift;
    my @oldcode = @{$obj->{code}};
    if (@_) {
	my $newcode = shift;
	_setcode($obj, $newcode);
    }
    wantarray ? @oldcode : $oldcode[0];
}

sub source {
    @_ == 1 || @_ == 2 or croak "Usage: OBJECT->source [(NEWSOURCE)]";
    my $obj = shift;
    if (@_) {
	my $oldsource = $obj->{source};
	$obj->{source} = shift;
	length $obj->{source} > 0xffff
	    and faint(SP_INDIGESTION);
	return $oldsource;
    }
    $obj->{source};
}

sub forall_code {
    @_ == 3 or croak "Usage: forall_code(CPTR, RULES, CODE)";
    my ($cptr, $rules, $co) = @_;
    for my $sptr (sort { $a <=> $b } keys %$cptr) {
	for my $p (@{$cptr->{$sptr}}) {
	    my ($ju, $sl, $ls, $ll, $ds, $dl, $ge,
		$ab, $qu, $xs, $xl, @ru) = @$p;
	    my $res = $co->($xs, $xl, $sptr, $sl, $ab,
			    $ls, $ll, $ds, $dl, $ge, $qu);
	    $p->[7] = $res if defined $res;
	}
    }
}

sub find_code {
    @_ == 3 or croak "Usage: find_code(CPTR, SPTR, RULES)";
    my ($cptr, $sptr, $rules) = @_;
    # if possible, find a valid statement
    if (exists $cptr->{$sptr}) {
	TRY:
	for my $p (@{$cptr->{$sptr}}) {
	    my ($ju, $sl, $ls, $ll, $ds, $dl, $ge,
		$ab, $qu, $xs, $xl, @ru) = @$p;
	    if ($rules) {
		for my $rn (@ru) {
		    next TRY if ! $rules->[$rn];
		    next TRY if ! ${$rules->[$rn]};
		}
	    }
	    # the first one found is the best as we have already sorted the
	    # list in _setcode
	    return ($xs, $xl, $sl, $ab, $ls, $ll, $ds, $dl, $ge, $qu);
	}
    }
    # no valid statement; determine the length of the comment
    my @later = grep { $_ > $sptr } keys %$cptr;
    my $len = undef;
    if (@later) {
	my ($next) = sort { $a <=> $b } @later;
	$len = $next - $sptr;
    }
    return (undef, $sptr, $len, 0);
}

sub _addcode {
    my ($js, $cf) = @_;
    my $fp = index($$js, $cf);
    return $fp if $fp >= 0;
    $fp = length($$js);
    $$js .= $cf;
    return $fp;
}

sub _setcode {
    my ($obj, $code) = @_;
    my %code = ();
    my $joincode = '';
    my $sts = pack('C', BC_STS);
    my @code = @{ref $code ? $code : [$code]};
    if (@code && $obj->{bug}[1] > rand(100)) {
	my $bpos = int(rand scalar @code);
	$code[$bpos] .= pack('C*', BC_BUG, BC($obj->{bug}[0] ? 1 : 0));
    }
    STATEMENT:
    for my $cv (@code) {
	next if $cv eq '';
	my $ep = length $cv;
	unless (substr($cv, 0, 1) eq $sts) {
	    my $bc = sprintf("%02X", ord(substr($cv, 0, 1)));
	    faint(SP_INVALID, $bc, "_setcode");
	}
	my $ncp = 1;
	my $start = BCget($cv, \$ncp, $ep);
	my $len = BCget($cv, \$ncp, $ep);
	my $junk = BCget($cv, \$ncp, $ep);
	my $count = BCget($cv, \$ncp, $ep);
	my @rules = ();
	while (@rules < $count) {
	    push @rules, BCget($cv, \$ncp, $ep);
	}
	@rules = sort { $a <=> $b } @rules;
	my $gerund = 0;
	my $abstain = 0;
	my $quantum = 0;
	my @label = (0, 0);
	my @dsx = (0, 0);
	while ($ncp < $ep) {
	    my $byte = ord(substr($cv, $ncp++, 1));
	    if ($byte == BC_NOT) {
		$abstain = 1;
		next;
	    }
	    if ($byte == BC_QUA) {
		$quantum = 1;
		next;
	    }
	    if ($byte == BC_LAB) {
		$ncp < $ep or faint(SP_INVALID, '(end of statement)', 'LAB');
		if (is_constant(ord(substr($cv, $ncp, 1)))) {
		    $label[0] = BCget($cv, \$ncp, $ep);
		    $label[1] = 0;
		} else {
		    my $diff = bc_skip($cv, $ncp, $ep);
		    $label[0] = _addcode(\$joincode, substr($cv, $ncp, $diff));
		    $label[1] = $diff;
		    $ncp += $diff;
		}
		next;
	    }
	    if ($byte == BC_DSX) {
		$ncp < $ep or faint(SP_INVALID, '(end of statement)', 'DSX');
		if (is_constant(ord(substr($cv, $ncp, 1)))) {
		    $dsx[0] = 1 + BCget($cv, \$ncp, $ep);
		    $dsx[1] = 0;
		} else {
		    my $diff = bc_skip($cv, $ncp, $ep);
		    $dsx[0] = _addcode(\$joincode, substr($cv, $ncp, $diff));
		    $dsx[1] = $diff;
		    $ncp += $diff;
		}
		next;
	    }
	    $gerund = $byte;
	    $ncp--;
	    last;
	}
	my @code = (
	    _addcode(\$joincode, substr($cv, $ncp, $ep - $ncp)),
	    $ep - $ncp,
	);
	# look for the very same thing...
	my @addit = (
	    $junk, $len,
	    $label[0], $label[1],
	    $dsx[0], $dsx[1],
	    $gerund, $abstain, $quantum,
	    $code[0], $code[1],
	    @rules,
	);
	if (exists $code{$start}{$junk}{$len}) {
	    TRY:
	    for my $p (@{$code{$start}{$junk}{$len}}) {
		next TRY if @addit != @$p;
		# the following works because @rules are sorted
		for (my $i = 0; $i < @addit; $i++) {
		    next TRY if $p->[$i] != $addit[$i];
		}
		# yup, it's the very same - no need to add it then
		next STATEMENT;
	    }
	}
	# we'll have to add this one
	push @{$code{$start}{$junk}{$len}}, \@addit;
    }
    length $joincode > 0xffff
	and faint(SP_INDIGESTION);
    # now go and transform each value of %code... note that we sort the
    # array so that noncomments are always before comments, and shorter
    # comments are preferred over longer; however within the same comment
    # length (or within the noncomment group) we prefer longer source
    # code; all else being equal, we prefer things which use more grammar
    # rules
    for my $sp (keys %code) {
	my @elems = ();
	for my $j (sort { $a <=> $b } keys %{$code{$sp}}) {
	    for my $l (sort { $b <=> $a } keys %{$code{$sp}{$j}}) {
		push @elems, sort {
		    scalar @$a <=> scalar @$b
		} @{$code{$sp}{$j}{$l}};
	    }
	}
	$code{$sp} = \@elems;
    }
    $obj->{code} = [$joincode, \%code];
}

1;
