#!/usr/local/bin/perl
#
#	A little database driven misassebler script to add headers, labels
#	and comments in disassembled 6502 ML programs. Easy to modify.
#	Most useful in documenting the operating system with on-line
#	memory maps.
#	This program is based on the idea of "monstar", a 8502 ML
#	disassembler with printer support.
#
#	C-compiler (cpp) is used to nicely handle includes and conditionals.
#	It's used if the Documentfile name is supplied with extension ``.c''.
#
    $Version= "4.03b	 7 Jun 1996";
#
#   Written by
#	jopi@zombie.oulu.fi
#
#
#	Version: 1.0   29 Apr 1994  14:49
#		Uses disassembly output from x64. Addresses in string format.
#	Version: 2.0   17 May 1994  19:23
#		Separated comment reading and data processing.
#		Produces Symbol Table for second run.
#		Warns about masked commands.
#	Version: 3.0   21 May 1994
#		Disassembles now directly from binary file.
#		'Print Text Immediate' is detected, and TEXT mode selected.
#		Added HEX to DEC conversion, addresses now internally in DEC.
#	Version: 3.1   16 July 1994
#		Added bit for undocumented opcodes. Selects autom. DATA mode.
#		Repositions when entry point is masked out.
#		Cleaned output format a bit. Now format v1.2.
#	Version: 3.2    4 Aug 1994
#		Inhibit 'BIT' from setting DATA label.
#		HTML mode and commandline arg parser.
#	Version: 3.3   10 Nov 1994
#		Added some variations in input and output formatting.
#		Made R65CE02 version too.
#	Version: 4.0   15 Feb 1995
#		Split disassembler into subroutines.
#		Prints blank lines after jump instructions.
#		Added WORD format. Now format v1.3.
#	Version: 4.1   08 Jun 1995
#		Added calling GCC for include files and conditional map blocks
#		Finds descriptions for references linked via JMP commands.
#		Skip default load address in the binary.
#	Version: 4.2   26 Jun 1995
#		Introduced FLPT data type.
#		Bug fix: JAM is a forbidden instruction.
#	Version: 4.3   16 Nov 1995
#		Divided disassembler further.
#		Converts screen codes to printable characters.
#		Options to select CPU on command line.
#		Separated SymbolType and SymbolName arrays.
#		Limited array sizes and made more active search for code.
#	Version: 4.3b    7 Jun 1996
#		Identify source on symbol table file.
#
#
#   Restrictions:
#	no more than 1 label per address supported yet (too lazy to split them)
#	addresses are always supposed to be in HEX
#	input format is fixed, albeit configurable:
#		doc:	addr	description	TYPE
#		dis:	. ADDR	MNEM operand
#	comments are preceded by ';'
#
#	All label declarations should be in the beginning of the document file
#	Incorrect header after "print text immediate" turns off textmode.
#


#
# Options
#

$COMPILER = "gcc -E";			### Default C Compiler Preprocessor

$EXESYMBOL = "i";
$DATSYMBOL = "s";

$LT_EXE = 1;
$LT_BRA = 2;
$LT_DAT = 4;
$LT_ARY = 8;

$LIMIT_IMPL_ARY = 256;

$BYTESPERLINE = 8;
$WORDSPERLINE = 8;
$MIN_SUGG_CE  = 4;		### Min # of Calls to suggest new code header
$CPUMODEL = 1;


opt: while ($_ = $ARGV[0], /^-/) {
	shift;
	if (/^-a/)	{ $addr = hex($ARGV[0]); shift; next opt; }
	if (/^-cpp/)	{ $COMPILER = $ARGV[0]; shift; next opt; }
	if (/^-m/)	{ $HDRFILE = $ARGV[0]; shift; next opt; }
	if (/^-o/)	{ $OUTFILE = $ARGV[0]; shift; next opt; }
	if (/^-s/)	{ $SYMFILE = $ARGV[0]; shift; next opt; }
	if (/^-d/)	{ $dontref = hex($ARGV[0]); shift;
			  print "skip: $dontref"; next opt; }

	if (/^-g/)	{ $debug++; next opt; }
	if (/^-html/)	{ $HTML++; next opt; }
	if (/^-i/)	{ $inherit++; next opt; }
	if (/^-p/)	{ $pass = 2; next opt; }
	if (/^-q/)	{ $verbose = 0; next opt; }
	if (/^-v/)	{ $verbose++; next opt; }
	if (/^-w/)	{ $vectors++; next opt; }
	if (/^-6502/)	{ $CPUMODEL = 1; next opt; }
	if (/^-65ce02/)	{ $CPUMODEL = 0; next opt; }
	if (/^-/)	{ print STDERR "recomment: Unknown option '$_'\n\n";
			  exit 1; }
    }

# addr is kept intact all processing time


$HDRFILE=$ARGV[0];
$PRGFILE=$ARGV[1];
$OUTFILE=$ARGV[2];

if ($ARGV[3]) {
    print STDERR "recomment: Too many arguments.\n\n";
    exit 1;
}

$USER = $ENV{USER} || die "You don't exist. Go away!\n";
$HOST = `uname -n` || die "No host. Where are you?\n";
chop $HOST;
$prgname = `basename $0`;
chop $prgname;

$Usage="\n\n   Usage: `basename $0` [-sym sym_outfile] [-html] [-discard address]\n\t [-addr start_address] headerfile programfile [outfile]\n\n";


#
#  Constants and Internal Variables
#

$, = ' ';
$\ = "\n";


# Mode constants
$NONE = 0;
$REM  = 1;
$CODE = 2;
$TEXT = 3;
$DATA = 4;
$WORD = 5;

$ILLEG = 64;			# flag: illegal or undocumented instruction


# -----------------------------------------------------------------------------

if ($CPUMODEL) {

    $ZERO_PAGE = 2;
    $RELATIVE = 11;
    $RELATIVE_LONG = 17;
    $ZERO_RELATIVE = 18;

#  Too cryptic ?  Nah, just compiled (gcc -E) from table.c ...
@modes = (
 64,  9, 64, 73, 66,  2,  2, 66,  0,  1, 12, 65, 69,  5,  5, 69,
 11, 10, 64, 74, 67,  3,  3, 67,  0,  7, 64, 71, 70,  6,  6, 70,
  5,  9, 64, 73,  2,  2,  2, 66,  0,  1, 12, 65,  5,  5,  5, 69,
 11, 10, 64, 74, 67,  3,  3, 67,  0,  7, 64, 71, 70,  6,  6, 70,
  0,  9, 64, 73, 66,  2,  2, 66,  0,  1, 12, 65,  5,  5,  5, 69,
 11, 10, 64, 74, 67,  3,  3, 67,  0,  7, 64, 71, 70,  6,  6, 70,
  0,  9, 64, 73, 66,  2,  2, 66,  0,  1, 12, 65,  8,  5,  5, 69,
 11, 10, 64, 74, 67,  3,  3, 67,  0,  7, 64, 71, 70,  6,  6, 70,
 65,  9, 65, 73,  2,  2,  2, 66,  0, 65,  0, 65,  5,  5,  5, 69,
 11, 10, 64, 74,  3,  3,  4, 68,  0,  7,  0, 71, 70,  6, 71, 71,
  1,  9,  1, 74,  2,  2,  2, 66,  0,  1,  0, 65,  5,  5,  5, 69,
 11, 10, 64, 74,  3,  3,  4, 68,  0,  7,  0, 71,  6,  6,  7, 71,
  1,  9, 65, 73,  2,  2,  2, 66,  0,  1,  0, 65,  5,  5,  5, 69,
 11, 10, 64, 74, 67,  3,  3, 67,  0,  7, 64, 71, 70,  6,  6, 70,
  1,  9, 65, 73,  2,  2,  2, 66,  0,  1,  0, 65,  5,  5,  5, 69,
 11, 10, 64, 74, 67,  3,  3, 67,  0,  7, 64, 71, 70,  6,  6, 70,
);

@tk = (
  "BRK",  "ORA",  "JAM",  "SLO",  "NOOP", "ORA",  "ASL",  "SLO",
  "PHP",  "ORA",  "ASL",  "ANC",  "NOOP", "ORA",  "ASL",  "SLO",
  "BPL",  "ORA",  "JAM",  "SLO",  "NOOP", "ORA",  "ASL",  "SLO",
  "CLC",  "ORA",  "NOOP", "SLO",  "NOOP", "ORA",  "ASL",  "SLO",

  "JSR",  "AND",  "JAM",  "RLA",  "BIT",  "AND",  "ROL",  "RLA",
  "PLP",  "AND",  "ROL",  "ANC",  "BIT",  "AND",  "ROL",  "RLA",
  "BMI",  "AND",  "JAM",  "RLA",  "NOOP", "AND",  "ROL",  "RLA",
  "SEC",  "AND",  "NOOP", "RLA",  "NOOP", "AND",  "ROL",  "RLA",

  "RTI",  "EOR",  "JAM",  "SRE",  "NOOP", "EOR",  "LSR",  "SRE",
  "PHA",  "EOR",  "LSR",  "ASR",  "JMP",  "EOR",  "LSR",  "SRE",
  "BVC",  "EOR",  "JAM",  "SRE",  "NOOP", "EOR",  "LSR",  "SRE",
  "CLI",  "EOR",  "NOOP", "SRE",  "NOOP", "EOR",  "LSR",  "SRE",

  "RTS",  "ADC",  "JAM",  "RRA",  "NOOP", "ADC",  "ROR",  "RRA",
  "PLA",  "ADC",  "ROR",  "ARR",  "JMP",  "ADC",  "ROR",  "RRA",
  "BVS",  "ADC",  "JAM",  "RRA",  "NOOP", "ADC",  "ROR",  "RRA",
  "SEI",  "ADC",  "NOOP", "RRA",  "NOOP", "ADC",  "ROR",  "RRA",

  "NOOP", "STA",  "NOOP", "SAX",  "STY",  "STA",  "STX",  "SAX",
  "DEY",  "NOOP", "TXA",  "ANE",  "STY",  "STA",  "STX",  "SAX",
  "BCC",  "STA",  "JAM",  "SHA",  "STY",  "STA",  "STX",  "SAX",
  "TYA",  "STA",  "TXS",  "SHS",  "SHY",  "STA",  "SHX",  "SHA",

  "LDY",  "LDA",  "LDX",  "LAX",  "LDY",  "LDA",  "LDX",  "LAX",
  "TAY",  "LDA",  "TAX",  "LXA",  "LDY",  "LDA",  "LDX",  "LAX",
  "BCS",  "LDA",  "JAM",  "LAX",  "LDY",  "LDA",  "LDX",  "LAX",
  "CLV",  "LDA",  "TSX",  "LAS",  "LDY",  "LDA",  "LDX",  "LAX",

  "CPY",  "CMP",  "NOOP", "DCP",  "CPY",  "CMP",  "DEC",  "DCP",
  "INY",  "CMP",  "DEX",  "SBX",  "CPY",  "CMP",  "DEC",  "DCP",
  "BNE",  "CMP",  "JAM",  "DCP",  "NOOP", "CMP",  "DEC",  "DCP",
  "CLD",  "CMP",  "NOOP", "DCP",  "NOOP", "CMP",  "DEC",  "DCP",

  "CPX",  "SBC",  "NOOP", "ISB",  "CPX",  "SBC",  "INC",  "ISB",
  "INX",  "SBC",  "NOP",  "USBC", "CPX",  "SBC",  "INC",  "ISB",
  "BEQ",  "SBC",  "JAM",  "ISB",  "NOOP", "SBC",  "INC",  "ISB",
  "SED",  "SBC",  "NOOP", "ISB",  "NOOP", "SBC",  "INC",  "ISB"
);

@cl = (1, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 1, 0);
@pe = ( "", "#", "",  "",   "",   "", "",   "",   "(", "(",  "(",   "",  "", "");
@pj = ( "", "",  "",  ",X", ",Y", "", ",X", ",Y", ")", ",X)", "),Y", "", "", "");

@jump = (
 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0,
 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0,
 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
);

# -----------------------------------------------------------------------------
} else {
# -----------------------------------------------------------------------------

# 65 CE 02

    $ZERO_PAGE = 4;
    $RELATIVE = 16;
    $RELATIVE_LONG = 17;
    $ZERO_RELATIVE = 18;

@modes = (
 64,  8,  0,  0,  4,  4,  4,  4,  0,  2,  1,  0, 11, 11, 11, 18,
 16,  9, 10, 17,  4,  5,  5,  4,  0, 13,  1,  0, 11, 12, 12, 18,
 11,  8,  7,  8,  4,  4,  4,  4,  0,  2,  1,  0, 11, 11, 11, 18,
 16,  9, 10, 17,  5,  5,  5,  4,  0, 13,  1,  0, 12, 12, 12, 18,
  0,  8,  1,  1,  4,  4,  4,  4,  0,  2,  1,  0, 11, 11, 11, 18,
 16,  9, 10, 17,  5,  5,  5,  4,  0, 13,  0,  0,  0, 12, 12, 18,
  0,  8,  2, 11,  4,  4,  4,  4,  0,  2,  1,  0, 14, 11, 11, 18,
 16,  9, 10, 17,  5,  5,  5,  4,  0, 13,  0,  0, 15, 12, 12, 18,

 16,  8, 19, 17,  4,  4,  4,  4,  0,  2,  0, 12, 11, 11, 11, 18,
 16,  9, 10, 17,  5,  5,  6,  4,  0, 13,  0, 13, 11, 12, 12, 18,
  2,  8,  2,  4,  4,  4,  4,  4,  0,  2,  0, 11, 11, 11, 11, 18,
 16,  9, 10, 17,  5,  5,  6,  4,  0, 13,  0, 12, 12, 12, 13, 18,
  2,  8,  2,  4,  4,  4,  4,  4,  0,  2,  0, 11, 11, 11, 11, 18,
 16,  9, 10, 17,  4,  5,  5,  4,  0, 13,  0,  0, 11, 12, 12, 18,
  2,  8, 19,  4,  4,  4,  4,  4,  0,  2,  0, 11, 11, 11, 11, 18,
 16,  9, 10, 17,  3,  5,  5,  4,  0, 13,  0,  0, 11, 12, 12, 18
);

@tk = (
 "BRK", "ORA", "CLE", "SEE", "TSB", "ORA", "ASL", "RMB0",
 "PHP", "ORA", "ASL", "TSY", "TSB", "ORA", "ASL", "BBR0",
 "BPL", "ORA", "ORA", "BPL", "TRB", "ORA", "ASL", "RMB1",
 "CLC", "ORA", "INC", "INZ", "TRB", "ORA", "ASL", "BBR1",

 "JSR", "AND", "JSR", "JSR", "BIT", "AND", "ROL", "RMB2",
 "PLP", "AND", "ROL", "TYS", "BIT", "AND", "ROL", "BBR2",
 "BMI", "AND", "AND", "BMI", "BIT", "AND", "ROL", "RMB3",
 "SEC", "AND", "DEC", "DEZ", "BIT", "AND", "ROL", "BBR3",

 "RTI", "EOR", "NEG", "ASR", "ASR", "EOR", "LSR", "RMB4",
 "PHA", "EOR", "LSR", "TAZ", "JMP", "EOR", "LSR", "BBR4",
 "BVC", "EOR", "EOR", "BVC", "ASR", "EOR", "LSR", "RMB5",
 "CLI", "EOR", "PHY", "TAB", "MAP", "EOR", "LSR", "BBR5",

 "RTS", "ADC", "RTS", "BSR", "STZ", "ADC", "ROR", "RMB6",
 "PLA", "ADC", "ROR", "TZA", "JMP", "ADC", "ROR", "BBR6",
 "BVS", "ADC", "ADC", "BPL", "STZ", "ADC", "ROR", "RMB7",
 "SEI", "ADC", "PLY", "TBA", "JMP", "ADC", "ROR", "BBR7",

 "BRA", "STA", "STA", "BRA", "STY", "STA", "STX", "SMB0",
 "DEY", "BIT", "TXA", "STY", "STY", "STA", "STX", "BBS0",
 "BCC", "STA", "STA", "BCC", "STY", "STA", "STX", "SMB1",
 "TYA", "STA", "TXS", "STX", "STZ", "STA", "STZ", "BBS1",

 "LDY", "LDA", "LDX", "LDZ", "LDY", "LDA", "LDX", "SMB2",
 "TAY", "LDA", "TAX", "LDZ", "LDY", "LDA", "LDX", "BBS2",
 "BCS", "LDA", "LDA", "BCS", "LDY", "LDA", "LDX", "SMB3",
 "CLV", "LDA", "TSX", "LDZ", "LDY", "LDA", "LDX", "BBS3",

 "CPY", "CMP", "CPZ", "DEW", "CPY", "CMP", "DEC", "SMB4",
 "INY", "CMP", "DEX", "ASW", "CPY", "CMP", "DEC", "BBS4",
 "BNE", "CMP", "CMP", "BNE", "CPZ", "CMP", "DEC", "SMB5",
 "CLD", "CMP", "PHX", "PHZ", "CPZ", "CMP", "DEC", "BBS5",

 "CPX", "SBC", "LDA", "INW", "CPX", "SBC", "INC", "SMB6",
 "INX", "SBC", "NOP", "ROW", "CPX", "SBC", "INC", "BBS6",
 "BEQ", "SBC", "SBC", "BPL", "PHW", "SBC", "INC", "SMB7",
 "SED", "SBC", "PLX", "PLZ", "PHW", "SBC", "INC", "BBS7"
);

@jump = (
 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 1,
 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
 2, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 1,
 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 1,

 2, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1
);

@cl = (
1, 1, 2, 3,  2, 2, 2,  2, 2, 2, 2,  3, 3, 3, 3, 3,  2, 3, 3, 2, 0
);

@pe = (
 "", "A", "#", "#", "", "", "",
 "(", "(", "(", "(", "", "", "",
 "(", "(", "", "", "", "(", ""
);

@pj = (
 "", "", "", "", "", ",X", ",Y",
 ")", ",X)", "),Y", "),Z", "", ",X", ",Y",
 ")", ",X)", "", "", "", ",SP),Y", ""
);

# -----------------------------------------------------------------------------
} # 65 CE 02
# -----------------------------------------------------------------------------


$sn = 0;
$i = 0;
$n = 0;
$flg = 0;
$ca = 0000;
$pca = "";


# Identify

print STDERR "\nReComment $Version\n";


#
# Process
#

if (open (OUT, ">$OUTFILE")) {
    print STDERR "\n Writing to $OUTFILE";
    select(OUT);
}

do print_headers();

# Read the headers, symbols, and comments from document file

do read_comments();

# if (!$flg) {
#	if (!$n) { printf STDERR "Cannot locate comments/headers.\n"; exit;}
#	for (i=0; i<n; i++) print comment[i]; i=0;
#	flg++;
# }


if (!$addr) {				### 65xx programs CANNOT start at 0000
    die "\nNo start address specified";
}


#
# Read the program disassembly
#


open(prg,"$PRGFILE") || die "\nCan't open program file $PRGFILE$Usage";

printf STDERR "\nProcessing $PRGFILE\n\n";

#
# Check for Load Address
#
read (prg, $_, 1); $op = ord($_);
read (prg, $_, 1); $op |= (ord($_) <<8);

if ($addr == $op) {
    printf STDERR "Skipped load address %04x\n", $op;
}
else {
    seek prg, -2, 1;
}


do reassemble();



#
#  Summary
#

if ($SYMFILE) {
    open(SYM,">$SYMFILE");

    printf (SYM "! Symbol Table for '$PRGFILE'   %s\n\n", `/bin/date`);

    printf STDERR "\nWriting Comment Table\n";
    printf SYM "\n\n! Comment Table\n\n";
    foreach $key (sort {$a <=> $b} (keys %subrtitle)) {
	printf SYM "_%04x\t%s\n", $key, $subrtitle{$key};
    }

    printf STDERR "Writing Address Table\n";
    printf SYM "\n\n! Suggest adding the following addresses:\n\n";
    foreach $key (sort {$a <=> $b} (keys %calls)) {
	($calls{$key} >= $MIN_SUGG_CE && !$title{$key}) &&
	    printf SYM "!\t%04x\t%d\n", $key, $calls{$key};
    }

    printf STDERR "Writing Symbol Table\n\n";
    printf SYM "\n\n";
    foreach $key (sort {$a <=> $b} (keys %SymbolType)) {

	if (! $SymbolName{$key}) {			### No name, create one
	    if ($SymbolType{$key} & $LT_EXE) {
		$SymbolName{$key} = "@";
	    }
	    else {
		$SymbolName{$key} = "&";
	    }
	}

	printf SYM "%s=%04x\n", $SymbolName{$key}, $key;
    }
    close(SYM);
}

do print_footers();
close(OUT);

printf ("\n");
exit 0;


# ----------------------------------------------------------------------------

sub reassemble {

$ca = $pca = $addr;
$n1=0;
$outmode = $hdrmode = $NONE;			### No headers given


# Do leading comments

while ($n1 < $nr && $remadr{$n1} < $addr) {
    print "$remark{$n1++}";			### program description
    ++$flg;
}


#
# Print straight from the binary
#

$padr = $addr;
$bytes = 0;

 code: while (read (prg, $_, 1)) {
    $prev = $op;
    $op = ord($_);


# ----------------------------------------------------------------------------

#
# Analyze Input
#

# Insert headers and function-wise comments

    if ($title{$padr}) {
	if (grep(/WORD|FLPT|DATA|CHIP|EMPTY/, $title{$padr})) {

	    if (grep(/WORD/, $title{$padr})) {
		$hdrmode = $WORD;
	    } else {
		$hdrmode = $DATA;
	    }

	    $branchflg = 0;

	    (!$wasblank) && printf "\n";
	    print "\n; $title{$padr}\n";	### routine name

	    if ($SymbolType{$padr} == $LT_EXE) {
		$SymbolType{$padr} = 0;
		$SymbolName{$padr} = "";
		printf STDERR "%04x: Ignored CALL reference.\n", $padr;
	    }
	}
	elsif (grep(/TEXT/, $title{$padr})) {
	    $hdrmode = $TEXT;
	}
	else {
	    $hdrmode = $CODE;
	    $branchflg = 0;

	    (!$wasblank) && printf "\n";
	    print "\n; $title{$padr}\n";	### routine name

	    if ($SymbolType{$padr} == $LT_DAT || $SymbolType{$padr} == ($LT_DAT | $LT_ARY)) {
		$SymbolType{$padr} = 0;
		$SymbolName{$padr} = "";
		printf STDERR "%04x: Ignored DATA reference.\n", $padr;
	    }
	}

	$outmode = $hdrmode;
    } # if title


    $flg = 0;
    while ($n1 < $nr && $remadr{$n1} <= $padr) {
	print "$remark{$n1++}";			### routine description
	++$flg;
    }
    if ($flg) {
	printf "\n";
	++$wasblank;
    }


#
# Switch between asm/hex modes
#

    if ($SymbolType{$padr} == $LT_EXE) {
	if ($outmode > $CODE) {
	    printf "\n";
	    ++$wasblank;
	}
	$outmode = $CODE;
    }
    elsif ($SymbolType{$padr} & $LT_DAT) {
	if ($outmode <= $CODE && !($jump[$prev])) {
	    $verbose && printf STDERR "%04X: CODE TO DATA attempted.\n", $padr;
	}
	else {
	    if ($outmode != $WORD) {
		$outmode = $DATA;

		if ($SymbolType{$padr} & $LT_ARY) {
		    $left = $LIMIT_IMPL_ARY;	### It all cannot be ONE array.
		}

	    }
	}
    }


    if ($modes[$op] & $ILLEG) {
	if ($SymbolType{$padr} == $LT_EXE) {
	    printf STDERR "Invalid reference %04X ignored.\n", $padr;
	}
	elsif ($outmode <= $CODE) {
	    printf STDERR "%04X: Illegal instruction.\n", $padr;
	}

	if ($outmode <= $CODE) {
	    $outmode = $DATA;
	    printf "\n";
	}
    } # illeg


# ----------------------------------------------------------------------------

if ($outmode == $WORD) {

#
# WORD FORMAT
#

    if (!$bytes++) {				### Addres
	do print_address($padr);

	printf ("\t.word");
    } # if bytes

    $op2 = ord(getc(prg));
    printf(" %02X%02X", $op2, $op);		### Word LO/HI


    if ($SymbolType{$padr +1} && !$SymbolType{$padr}) { # test for misplaced label
	printf STDERR "%04X: ADDRESS DIFFER in WORD format.\n", $padr;

	printf ("\n; *** %04X: ADDRESS DIFFER. This may indicate misassembly ***\n", $padr);
    }


    $padr += 2;

    if ($bytes >= $WORDSPERLINE || $title{$padr} || $SymbolType{$padr} ) {
	printf "\n";				### Newline
	$bytes = 0;
    }

    if ($title{$padr+1}) {			### test misalignment
	seek(prg, -1, 1);
	--$padr;
	printf "\n; *** Resyncing ***";
    }

   next code;

# ----------------------------------------------------------------------------
} elsif ($outmode > $CODE) {
# ----------------------------------------------------------------------------


# Test for code

    if ($left) {				### Max bytes per array
	if (! --$left) { $hdrmode = $NONE; }
    }

    if ($hdrmode == $NONE && grep (/LDA|LDX|LDY|JMP|BRA/, $tk[$op])) {
# should test a few commands ...

	$outmode = $NONE;
	goto disasm;
    }


#
# BYTE FORMAT
#

    if (!$bytes++) {				### Addres
	do print_address($padr);

	printf ("\t.byte");
	$ps = "";
    } # if bytes

    printf(" %02X", $op);			### Bytes
    $ps .= $_;
    ++$padr;


    if ( ($outmode == $TEXT && !$op) ||
	$bytes >= $BYTESPERLINE || $title{$padr} || $SymbolType{$padr} ) {
	for ($i = $bytes; $i < $BYTESPERLINE ; $i++) { printf "   "; }

	$ps =~ y/\001-\037A-Za-z\240-\277\301-\332/a-z\[\\\]^_a-zA-Z\040-\077A-Z/;	# to ascii
	$ps =~ y/\040-\176/\./c;		### Filter out special chars
	print "  ;$ps";				### Text and Newline
	$bytes = 0;

	if ($outmode == $TEXT && !$op) {
	    $outmode = $CODE;
	    printf "\n";
	    ++$wasblank;
	}
    }

   next code;

# ----------------------------------------------------------------------------

} else {		# outmode

# ----------------------------------------------------------------------------

disasm:

# The main part of this disassembler routine is grabbed directly from
# "monstar v1.85", a 8502 disassembler written in Basic 7.0 and 2.0.
#
    $ps = "";
 
    $am = $modes[$op] & 31;			### mode and type
    $bytes = $cl[$am];
 
    if ($bytes == 2) {
	$oper = ord(getc(prg));
	if ($am == $RELATIVE) {			### Relative
	    do calc_disp($oper);

	    $oper += 2 + $padr;
	    $ps = sprintf( " \$%04X", $oper);
	}
	else {
	    $ps = sprintf( " $pe[$am]\$%02X$pj[$am]", $oper);
	}
    }
    elsif ($bytes == 3) {
	if ($am == $ZERO_RELATIVE) {		### Relative Depending On Byte
	    $cc = ord(getc(prg));
	    $oper = ord(getc(prg));

	    do calc_disp($oper);

	    $oper += 3 + $padr;
	    $ps = sprintf(" \$%02X,\$%04X", $cc, $oper);
	}
	else {
	    $oper = ord(getc(prg));
	    $oper |= (ord(getc(prg)) << 8);

	    if ($am == $RELATIVE_LONG) {	### Relative Long
		$oper = ($oper + 2 + $padr) & 0xffff;
	    }
	    $ps = sprintf(" $pe[$am]\$%04X$pj[$am]", $oper);
	}
    }

#printf ("DEBUG: (%d) %04x  %02x\t%3s$ps\n",
#	$bytes, $padr, ord($_),  $tk[$op], $oper); 

#   ($oper = $ps) =~ y/a-z\(\)\$\ /A-Z/d;	### operand field



# -----------------------------------------------------------------------------

#
# Find new references
#
    do makerefer($oper, $ps);

} # outmode


#
# FORMAT OUTPUT
# 
# Finally, format the output
# and add symbols where used
#

	do print_address($padr);
	do print_instruction();


# ----------------------------------------------------------------------------

#
# Some reasoning ...
#
# This part checks that given memory map entries do exist, and
# separates consecutive ML routines with a blank line.
#

	if ($bytes > 1) {
	    if (($title{$padr+1} || $SymbolType{$padr+1}) ||
		(($bytes > 2) && ($title{$padr+2} || $SymbolType{$padr+2})) ) {

		if ($title{$padr+1}) {
		    $off = 1;

#		    if (($modes[$op] & $ILLEG) && $SymbolType{$padr} == $LT_EXE) {
#			printf STDERR "Invalid title %04X ignored.\n", $padr;
#		    }
#		    else {
			seek(prg, 1-$bytes, 1);
			printf "\n; *** Resyncing ***";
			$bytes = 1;
#		    }

		}
		elsif ($title{$padr+2}) {
		    $off = 2;
		    seek(prg, 2-$bytes, 1);
		    printf "\n; *** Resyncing ***";
		    $bytes = $off;
		}
		else {
		    if (($SymbolType{$padr+1} == $LT_EXE) ||
			(($bytes > 2) && $SymbolType{$padr+2} == $LT_EXE) ) {
			printf ("; *** %04X: CALL ADDRESS ALIGNMENT. This may indicate misassembly ***\n", $padr);
		    }
		    else {
			printf ("; *** %04X: Warning: Self modifying operand. ***\n", $padr);
		    }

		} # else
	    } # differ
	}
	$padr += $bytes;
	$bytes = 0;

	if ($branchflg < $padr &&		### End of routine
	    ($tk[$op] eq "JMP" || $tk[$op] eq "BRA" ||
	     $tk[$op] eq "RTS" || $tk[$op] eq "RTI")) {
	    printf "\n";
	    ++$wasblank;
	}


# ----------------------------------------------------------------------------

    } # while code

# Did we encounter EOF in the middle of a ML routine ?

if ($outmode <= $CODE && !($jump[$prev])) {
    $verbose && printf STDERR "%04X: Premature end of input file.\n", $padr;
}

} # sub reassemble


# ----------------------------------------------------------------------------

#
# Find new references
#
# From disassembly: $oper holds the operand address once found
# from binary: $oper is the operand.

sub makerefer {

    $flg = 0;

    if ($am == $RELATIVE || $am == $RELATIVE_LONG || $am == $ZERO_RELATIVE ||
      $tk[$op] eq "JMP" || $tk[$op] eq "JSR") {	### test mnemonic field

	$flg++;

	if (!$SymbolType{$oper}) {
	    $SymbolType{$oper} = $LT_EXE;		### store symbol table
	    ($debug) && printf STDERR "%04X: autodefine label: %04X\n", $padr, $oper;
	} # symbol
	elsif ($SymbolType{$oper} & $LT_DAT) {
	    printf STDERR "Reference mismatch for %04X.\n", $oper;
	}


	### test mnemonic and operand fields

	if ($am == $RELATIVE || $am == $ZERO_RELATIVE) {
	    if ($oper > $branchflg) {
		$branchflg = $oper;		### prune logic tree
	    }
	}

	elsif ($tk[$op] eq "JMP") {		### copy indirect header
	    if ($SymbolName{$padr} && !$title{$padr} && $title{$oper}) {
		$subrtitle{$padr} = $title{$oper};
	    }
	}

	# For C128 and C65: Print Immediate

	elsif ($tk[$op] eq "JSR") {	### test mnemonic and operand fields
	    if ($oper == hex(FF7D) ||

		($CPUMODEL && (
			$oper == hex(9281) || $oper == hex(FA17) ||	# C128
			$oper == hex("8DFD") || $oper == hex("C01B") ||	# C64 ARC
			$oper == hex("FF4F")				# Plus4
				) ) ||
		(!$CPUMODEL && $oper == hex(FA37))			# C65
	    ) {
		printf STDERR "%04X: TEXT immediate\n", $padr;
		$outmode = $TEXT;
		(!$wasblank) && printf "\n";	### Blank before JSR _PRIMM
	    }
	    ++$calls{$oper};			### Find new subroutines
	}
    } # JMP, JSR

    elsif ($bytes > 1 && $tk[$op] ne "BIT" &&	### Filter out 'BIT'
	   (substr($ps,1,1) eq "\$" || substr($ps,1,2) eq "(\$") ) {
						### Zeropage and Absolute modes
	if (!$SymbolType{$oper}) {
	    $SymbolType{$oper} = $LT_DAT;		### store symbol table

	    ($debug) &&
		printf STDERR "%04X: autodefine label: %04X\n", $padr, $oper;
	}
	elsif (! ($SymbolType{$oper} & $LT_DAT)) {
	    printf STDERR "Reference mismatch for %04X.\n", $oper;
	}

	if (grep (/X|Y|Z|SP/, $ps)) {
	    $SymbolType{$oper} |= $LT_ARY;		### array flag
	}

	$flg++;
    } # BIT
}


# -----------------------------------------------------------------------------

sub calc_disp {
    if ($oper == 255) {
	$outmode = $DATA;
	return;
    }

    if ($oper > 127) {
	if ($oper == 254) {			### offset == 0
	    printf STDERR "%04x: Endless loop.\n", $padr;
	}

	$oper -= 256;
    }
}


sub print_address {

# DEBUG / Memory dump: print all addresses

#   printf(" %4s\t", $padr);
    printf(" %04X\t", $padr);

    $wasblank = 0;

    ### create assembler source code

    if ($SymbolType{$padr}) {			### test for label

	if ($HTML) {				### HTML Anchor
	    printf ("<A NAME=\"%04X\">", $padr);
	}


	if ($SymbolName{$padr}) {
	    printf ($SymbolName{$padr});
	}
	else {
	    if ($SymbolType{$padr} & $LT_EXE) {
		printf ("%s%04X", $EXESYMBOL, $padr);
	    }
	    else {
		printf ("%s%04X", $DATSYMBOL, $padr);
	    }
	}

	if ($HTML) {				### HTML Anchor
	    printf ("</A>");
	}
    }
}


# -----------------------------------------------------------------------------

#
# Print Instruction
#

sub  print_instruction {

    ### Refer to HTML Anchor if $oper points to ROM

    if ($HTML && $flg && $SymbolType{$oper} &&
	($oper >= $addr) &&  (grep( !/CHIP/, $title{$oper})) &&
	(!$dontref || $oper < $dontref || $oper > ($dontref + 0xfff))) {

	printf ("\t%s<A HREF=\"#%04X\">%s</A>", $tk[$op], $oper, $ps);
    }
    else {
	printf ("\t%s%s", $tk[$op], $ps);	### adjust this to suit
    }

    if ($flg && $title{$oper}) {		### add line-wise comments

	do adjust_oper();		### 3 or less characters in operand
	@tmp = split(/\\/, $title{$oper});
	printf ("\t; %s\n", $tmp[0]);	### print the first header
    }
    elsif ($flg && $subrtitle{$oper}) {
	do adjust_oper();
	printf ("\t; Do %s\n", $subrtitle{$oper});	### print the subheader
    }
    else {
	printf ("\n");
    }
}


sub adjust_oper {	### 3 or less characters in operand
    if ( (($am & 31) <= 1 || ($am & 31) == $ZERO_PAGE) &&
	length($tk[$op]) < 4 ) {
	printf ("\t");
    }
}


# -----------------------------------------------------------------------------

sub print_headers {
    $date = `/bin/date`;
    chop $date;

    if ($HTML) {
	printf ("<HTML>\n<HEAD>\n<TITLE>Untitled</TITLE>\n</HEAD>\n");
	printf ("<!-- $prgname $Version. -->\n");
	printf ("<!-- Formatted to HTML for $USER@$HOST  $date. -->\n");
	printf ("<BODY>\n<H1>Automatic Generated Disassembly</H1>\n<PRE>\n");
    }
}

sub print_footers {
    printf("\n\n\n; Misassembly source v1.3   Generated  %s\n\n", `/bin/date`);

    if ($HTML) {
	printf ("</PRE>\n<P><HR>\n");
	printf ("Formatted to HTML by <EM>recomment v4.01</EM> reassembler.\n");
	printf ("</BODY></HTML>\n");
    }
}


#
# Read the headers, symbols, and comments from document file
# If the Documentfile name is supplied with extension ``.c'',
# C Prerocessor is called to check it out first.
#

sub read_comments {
    if (grep(/\.c$/, $HDRFILE) > 0) {
	open(doc,"$COMPILER $HDRFILE |") ||
	    die "\n$COMPILER: Can't open header file $HDRFILE$Usage";
    } else {
	open(doc,"$HDRFILE") || die "\nCan't open header file $HDRFILE$Usage";
    }

  com: while (<doc>) {
      chop;

      if (/^\s*$/ || /^#/) {			### Empty or C Compiler line
	  next com;
      }

#
# Control
#
#      if (/^;\!\s*FORMAT\s+/) {		### Define Map Fields used
#	  @Fld = split(' ', $_, 99);

#	  printf STDERR"%s\n", $_;
#	  for ($i = 0; ++$i < $#Fld; ) { $Complex{$Fld[$i]} = $Fld[$#Fld]; }

#	  next com;
#     }


#
# Language (CPU Model)
#

      if (/^;\!\s*CPU\s*(\S+)/) {
	  if ($1 == "6502" || $1 == "6510" || $1 == "8500" || $1 == "8502") {
	      $CPUMODEL = 1;
	  }
	  elsif ($1 == "65ce02" || $1 == "65CE02") {
	      $CPUMODEL = 0;
	  }

	  next com;
      }


#
# EQU
#
      if (/^\s*EQU\s*\=\s*(\S+)/) {		### Start address
	  $h = $1;
	  $h =~ y/\$\ //d;
	  $addr = hex($h);			### Use it
	  next com;
      }

      if (/^\s*(\S+)\s*\=\s*(\S+)/) {		### no EQUs between code
	  $h = $2;
	  $h =~ y/\$\ //d;
	  $a = hex($h);

#	  if ($1 ne $EXESYMBOL && $1 ne $DATSYMBOL) {
#	      $SymbolName{$a} = $1;			### store symbol table
#	  }

	  if ($1 eq "&") {
	      $SymbolType{$a} = $LT_DAT;
	  }
	  else {
	      $SymbolType{$a} = $LT_EXE;
	  }


	  if ($verbose) {
	      printf STDERR "define label: %8s = %04X\n", $1, $a;
	  }
	  next com;
      }

      if ($HTML) {				### Protect special characters
	  s/&/&amp;/g;
	  s/</&lt;/g;
	  s/>/&gt;/g;
      }

#
# Comment
#
      if (/^\;/) {
	  if (!$pca) {				### 0 == array base
	      print $_;
	  }
	  else {
	      $remadr{$nr}   = $pca;		### routine explanation
	      $remark{$nr++} = $_;		### save it until addr reached
	  }
	  next com;
      }

#
# Memory Map Entry
#
      else {					### memory map line: title
	  ($ca, $ctext) = /^\s*(\S+)\-*\S*\s*(.+)/;
	  $ca = hex($ca);

	  if ($ca < $pca) {
	      print "; *** ERROR: Descending address: $_ ***\n";
	      next com;
	  }

	  if ($title{$ca}) {
	      print STDERR "Warning: Duplicate header: $_\n";
	      $title{$ca} .= '\\' . $ctext;
	  }
	  else {
	      $title{$ca}   = $ctext;		### save it for use
	  }
	  $pca = $ca;
      }
  }
}
