#!/usr/bin/perl -w

# This script is used to convert information from amber9 prmtop and 
# trajectory files to a format readable by the accent-md tool
# It serves as a frontend to and repeatedly calls ptraj

$version=0.4;

$PI = 3.1415926540;
$DEG2RAD = $PI / 180;

if (! defined ($ARGV[0]) || $ARGV[0] =~ /-h|-help|-info/) {
    print "\tUsage: amber2accent.pl <inputfile> [<outputfile>]\n\tUse -sample for example inputfile\n";
    exit(0);
}
if ($ARGV[0] =~ /-v|-version/) {
    print "\tamber2accent trajectory converter version $version,\n";
    print "\twritten by Thomas Steinbrecher, TSRI, 2007.\n";
    print "\tFor information contact steinbrt\@scripps.edu\n";
    exit(0);
}
if ($ARGV[0] =~ /-sample/) {
    print "#\tA sample input file to amber2accent:\n#\n";
    &print_sample_input();
    exit(0);
}

# Open the input and output files

if (! open (INPUT, $ARGV[0])) {
    print "Enter name of input file to process: ";
    $filename = <STDIN>;
    open (INPUT, $filename) || die "Can't open file\n";
}
if ($ARGV[1]) {
    open OUTPUT,'>', $ARGV[1];
} else {
    open OUTPUT,'>', "$ARGV[0].out";
}

@date=localtime(time());
$date[5]+=1900;
$date[4]++;
print OUTPUT "-" x 79, "\n";
print OUTPUT "Amber2accent version $version\n\n";
printf OUTPUT "%.2i:%.2i, %.2i.%.2i.%4i\n",$date[2],$date[1],$date[3],$date[4],$date[5];
print OUTPUT "-" x 79, "\n";
print OUTPUT "Parameters:\n\n";

# Read the input file and define default parameters

$use_ptraj=0;
$root_defined=0;
$ptraj_exec='ptraj';
$use_pseudobond=0;

while ($line = <INPUT>) {
    chomp ($line);
    if ($line =~/^\#/) {
	# This is just a comment line in the input
    } elsif ($line =~ /^USE_PTRAJ\s+(\d+)/) {
	$use_ptraj=$1;
	if ($use_ptraj==0) {
	    die("Ptraj must be used for now\n");
	} else {
	    print OUTPUT "Ptraj used to calculate BAT values\n";
	}
    } elsif ($line =~ /^RESIDUES\s+(.+)/) {
	print OUTPUT "Solute residues: $1\n";
	@res0 = split (/,/, $1);
	$i=0;
	foreach $j (@res0) {
	    if ($j =~ /(\d+)-(\d+)/) {
		for ($k=$1 ; $k <= $2 ; $k++) {
		    $res[$i++] = $k;
		}
	    } elsif ($j =~ /\d+/) {
		$res[$i++] = $j;
	    } else {
		die("Residue selection $j not understood\n");
	    }
	}
    } elsif ($line =~ /^ROOT_ATOMS\s+(\d+)\s+(\d+)\s+(\d+)/) {
	$root1 = $1;
	$root2 = $2;
	$root3 = $3;
	$root_defined=1;
	print OUTPUT "Root atoms defined as: $root1, $root2, $root3\n";
    } elsif ($line =~ /^PTRAJ_EXEC\s+(.+)/) {
	$ptraj_exec = $1;
	print OUTPUT "ptraj executable set to: $ptraj_exec\n";
    } elsif ($line =~ /^PRMTOP\s+(.+)/) {
	print OUTPUT "Using prmtop file $1\n";
	$prmtop = $1;
    } elsif ($line =~ /^CRD\s+(.+)/) {
	print OUTPUT "Using trajectory file $1\n";
	$trajectory = $1;
    } elsif ($line =~ /^PSEUDO_BOND\s+(.+)\s+(.+)/) {
	$use_pseudobond = 1;
	$pseudo1 = $1;
	$pseudo2 = $2;
	print OUTPUT "Using pseudobond between atom $1 and $2\n";
    }
}

open (PRMTOP, $prmtop) || die ("Can't open prmtop file $prmtop\n");
open (TRAJ, $trajectory) || die "Can't open trajectory file $trajectory\n";

if (@res==0) {die("No residue selected\n");}

if ($root_defined==0) {print OUTPUT "Root atoms will be selected automatically\n";}
print OUTPUT "-" x 79, "\n";

# Read the prmtop file
print OUTPUT "Reading amber prmtop file\n\n";
$line = <PRMTOP>;
if ( ! $line =~ /%VERSION  VERSION_STAMP = .*  DATE = .*/) {
    die ("Error reading input prmtop file\n");
}
while ($line = <PRMTOP>) {
    if ($line =~/FLAG\sRESIDUE_POINTER/) {
	&read_res_pointers();
    }
    if ($line =~/FLAG\sPOINTERS/) {
	&read_final_atom_number();
    }
    if ($line =~/FLAG\sBONDS_INC_HYDROGEN/) {
	&read_bonds();
    }
    if ($line =~/FLAG\sBONDS_WITHOUT_HYDROGEN/) {
	&read_bonds();
	last;
    }
}

&build_connectivity_hashes();

if ($root_defined==0) {
    &autoselect_root_atoms();
}

# Construct the BAT tree
# This defines the molecule in terms of its internal bond angle 
# and torsional degrees of freedom as outlined in Killian et al. 
# J. Chem. Phys. 127, 024107, 2007.

# Put the root atoms into the tree

$tree{$root1} = [-1,-1,-1,'n'];
$tree{$root2} = [$root1,-1,-1,'n'];
$tree{$root3} = [$root2,$root1,-1,'n'];

# Check if root1 is terminal
foreach $tmp ( @{ $connect{$root1} } ) {
    if ( ! defined($tree{$tmp})) {
	die ("Error building tree: Root atom 1 is connected to atom $tmp\n");
    }
}

# Deal with root 2 impropers if necessary
foreach $tmp ( @{ $connect{$root2} } ) {
    if ( ! defined($tree{$tmp})) {
	$tree{$tmp} = [$root2, $root1, $root3, 'i'];
	foreach $tmp2 ( @{ $connect{$tmp} } ) {
	    if ( ! defined($tree{$tmp2} ) ) {
		die ("Error building tree: Atom $tmp is connected to non-root atom $tmp2\nAll atoms connected to Root atom 2 must be terminal or root atoms\n");
	    }
	}
    }
}

# Check that root3 has connections
$i=0;
foreach $tmp ( @{ $connect{$root3} } ) {
    if ( ! defined($tree{$tmp})) {
	$i++;
    }
}
if ($i==0) {
    die ("Error building tree: Root atom 3 is a terminal atom\n");
}
@nodes = ();
push @nodes, $root3;

# Now the magic part:
while ($use_pseudobond >= 0) {
    while ($branch_head = pop @nodes) {
	@all_twigs = @{ $connect{$branch_head} };
	@twigs=();
	foreach $tmp ( @all_twigs ) {
	    if ( ! defined $tree{$tmp} ) {
		push @twigs, $tmp;
	    }
	}
	foreach $tmp ( @twigs ) {
	    $third=$tree{$branch_head}[0];
	    $fourth=$tree{$branch_head}[1];
	    $tree{$tmp} = [$branch_head, $third, $fourth, 'p'];
	}
	push @nodes, @twigs;
    }
    $use_pseudobond--;
    if ($use_pseudobond >=0) {
	push @nodes, $pseudo1;
	push @{ $connect{$pseudo1} }, $pseudo2;
	push @{ $connect{$pseudo2} }, $pseudo1;
    }
}

&print_BAT_tree();

# Calculate BAT values
if ($use_ptraj == 1) {
    @bondfilenames=();
    @anglefilenames=();
    @torsionfilenames=();
    # Write an input file for ptraj and run it
    $numcommands=0;
    $times=1;
    open PTRAJIN, '>', "ptraj${times}.in";
    print PTRAJIN "trajin $trajectory\n";
    foreach $tmp ( sort {$a <=> $b} keys %tree) {
	$numcommands++;
	# Do all the bonds
	if ($tree{$tmp}[0]>0) {
	    printf PTRAJIN "distance bond%.4i \@%-4.i \@%-4.i out bond%.4i \n"
		, $tmp, $tmp, $tree{$tmp}[0], $tmp;
	    push @bondfilenames, sprintf("bond%.4i", $tmp);
	}
	# the angles
	if ($tree{$tmp}[0]>0 && $tree{$tmp}[1]>0) {
	    printf PTRAJIN "angle angle%.4i \@%-4.i \@%-4.i \@%-4.i out angle%.4i \n"
		, $tmp, $tmp, $tree{$tmp}[0], $tree{$tmp}[1], $tmp;
	    push @anglefilenames, sprintf("angle%.4i", $tmp);
	}
	# the dihedrals
	if ($tree{$tmp}[0]>0 && $tree{$tmp}[1]>0 && $tree{$tmp}[2]>0) {
	    printf PTRAJIN "dihedral torsion%.4i \@%-4.i \@%-4.i \@%-4.i \@%-4.i out torsion%.4i \n"
		, $tmp, $tmp, $tree{$tmp}[0], $tree{$tmp}[1], $tree{$tmp}[2], $tmp;
	    push @torsionfilenames, sprintf("torsion%.4i", $tmp);
	}
	# Since ptraj can die on big input, run it separately for every 100 bonds
	if ($numcommands >99) {
	    $numcommands = 0;
	    close PTRAJIN;
	    print OUTPUT "-" x 79, "\n";
	    print OUTPUT "Running ptraj on input file $times\n";
	    print "Running ptraj on input file $times\n";
	    print OUTPUT "PTRAJ output: \n";
	    $ptraj_output = `$ptraj_exec $prmtop ptraj${times}.in 2>&1`;
	    if ( ! defined $ptraj_output) { die ("Error: ptraj produced no output, set PTRAJ_EXEC in the input file or run ptraj by hand\n");}
	    print OUTPUT $ptraj_output;
	    print OUTPUT "-" x 79, "\n";
	    $times++;
	    open PTRAJIN, '>', "ptraj${times}.in";
	    print PTRAJIN "trajin $trajectory\n";
	}
    }
    close PTRAJIN;
    $ptraj_output = `$ptraj_exec $prmtop ptraj${times}.in 2>&1`;
    print OUTPUT "-" x 79, "\n";
    print OUTPUT "PTRAJ output: \n";
    if ( ! defined $ptraj_output) { die ("Error: ptraj produced no output, set PTRAJ_EXEC in the input file or run ptraj by hand\n");}
    print OUTPUT $ptraj_output;
    print OUTPUT "-" x 79, "\n";
} else { 
    # $use_ptraj == 0
    # nothing here yet...
}

# Now reformat the ptraj output files into big lists
# in bondlist[i][j] i gives the number of the bond, 
# j gives the timestep

$i=0;
while ($file = shift @bondfilenames) {
    open FILE, $file;
    $i++;
    $j=0;
    while ($tmp = <FILE>) {
	$j++;
	$tmp =~ /\d+.\d+\s+(\d+.\d+)/;
	$bondlist[$i][$j]=$1;
    }
    close FILE;
}
open BONDLIST, '>', "bonds_complete.out";
for ($k=1 ; $k<=$j ; $k++) {
    printf BONDLIST "%06i\t", $k;
    for ($l=1 ; $l<=$i ; $l++) {
	printf BONDLIST "%2.8f\t", $bondlist[$l][$k];
    }
    print BONDLIST "\n";
}
close BONDLIST;
print OUTPUT "Outputting list of bond distances\n";

$i=0;
while ($file = shift @anglefilenames) {
    open FILE, $file;
    $i++;
    $j=0;
    while ($tmp = <FILE>) {
	$j++;
	$tmp =~ /\d+.\d+\s+(\d+.\d+)/;
	$anglelist[$i][$j]=$1*$DEG2RAD;
    }
    close FILE;
}
open ANGLELIST, '>', "angles_complete.out";
for ($k=1 ; $k<=$j ; $k++) {
    printf ANGLELIST "%06i\t", $k;
    for ($l=1 ; $l<=$i ; $l++) {
	printf ANGLELIST "%2.8f\t", $anglelist[$l][$k];
    }
    print ANGLELIST "\n";
}
close ANGLELIST;
print OUTPUT "Outputting list of angles\n";

$i=0;
while ($file = shift @torsionfilenames) {
    open FILE, $file;
    $i++;
    $j=0;
    while ($tmp = <FILE>) {
	$j++;
	$tmp =~ /\d+.\d+\s+(-?\d+.\d+)/;
	$torsionlist[$i][$j]=$1*$DEG2RAD;
	if ($torsionlist[$i][$j]<0) {
	    $torsionlist[$i][$j] += 2 * $PI;
	}
    }
    close FILE;
}
open TORSIONLIST, '>', "torsions_complete.out";
for ($k=1 ; $k<=$j ; $k++) {
    printf TORSIONLIST "%06i\t", $k;
    for ($l=1 ; $l<=$i ; $l++) {
	printf TORSIONLIST "%2.8f\t", $torsionlist[$l][$k];
    }
    print TORSIONLIST "\n";
}
close TORSIONLIST;
print OUTPUT "Outputting list of torsion values\n";
print OUTPUT "-" x 79, "\n";

# Finalize
print OUTPUT "Execution of amber2accent finished\n";
print OUTPUT "-" x 79, "\n";
close INPUT;
close TRAJ;
close PRMTOP;
close OUTPUT;
exit(0);

#############################################################################################
#############################################################################################
#############################################################################################
# Subroutines

sub read_res_pointers {
    @res_pointer=();
    $line = <PRMTOP>;
    while ($line = <PRMTOP>) {
	if ($line =~ /FLAG/) {
	    last;
	} else {
	    @tmp = split /\s+/, $line;
	    shift @tmp;
	    $num = push (@res_pointer, @tmp);
	}
    }
    print OUTPUT "$num residue pointers read\n";
    @atom=();
    foreach $resnum (@res) {
	$first=$res_pointer[$resnum-1];
	if (defined($res_pointer[$resnum])) {
	    $last=$res_pointer[$resnum];
	} else {
	    $last=$finalatom+1;
	}
	for ($i=$first ; $i < $last ; $i++) {
	    $atom[$i]=1;
	}
    }
    print OUTPUT "Atom map generated\n";
}

sub read_final_atom_number {
    $finalatom = -1;
    $line = <PRMTOP>;
    $line = <PRMTOP>;
    if ($line =~ /^\s+(\d+)\s+/) {
	$finalatom = $1;
	print OUTPUT "Final atom number: $finalatom\n";
    } else {
	print OUTPUT "Could not determine final atom number\n";
    }
}

sub read_bonds {
    $line = <PRMTOP>;
    while ( $line = <PRMTOP>) {
	if ($line =~ /FLAG/) {
	    last;
	} else {
	    @tmp = split /\s+/, $line;
	    shift @tmp;
	    $num = push (@bonds, @tmp);
	}
    }
}

sub build_connectivity_hashes() {
    while (defined($a1 = shift(@bonds))) {
	$a2 = shift @bonds;
	shift @bonds;
	$a1 = ($a1/3)+1;
	$a2 = ($a2/3)+1;
	if (defined ($atom[$a1]) && defined ($atom[$a2])) {
	    push @{ $connect{$a1} }, $a2;
	    push @{ $connect{$a2} }, $a1;
	} elsif ( ! defined ($atom[$a1]) && ! defined ($atom[$a2]) ) {
	    # not used, probably solvent
	} else {
	    die ("Error: residue selection is not a complete molecule, there is a bond between atoms $a1 and $a2\n");
	}
    }
    print OUTPUT "Connectivity table build\n";
}

sub print_connectivity() {
    foreach $tmp ( keys %connect ) {
        printf "Atom %5i is bonded to :", $tmp;
        foreach $tmp2 ( @{ $connect{$tmp} } ) {
	    print "$tmp2 ";
	}
        print "\n";
    }
}

sub print_BAT_tree() {
    print OUTPUT "Bond, Angle, Torsional Tree for residue selection:\n";
    foreach $tmp ( sort { $a <=> $b } keys %tree ) {
	printf OUTPUT "%5i %5i %5i %5i   $tree{$tmp}[3]\n",$tmp, $tree{$tmp}[0],$tree{$tmp}[1],$tree{$tmp}[2];
    }
}

sub autoselect_root_atoms() {
    # Try and use the hydrogen-dihedrals from the prmtop as root atoms
    while ($line = <PRMTOP>) {
	if ($line =~/FLAG\sDIHEDRALS_INC_HYDROGEN/) {
	    $line = <PRMTOP>;
	    while ( $line = <PRMTOP>) {
		if ($line =~ /FLAG/) {
		    last;
		} else {
		    @tmp = split /\s+/, $line;
		    shift @tmp;
		    push (@dihedrals, @tmp);
		}
	    }
	}
    }
    $fail=1;
    while (defined ($dihed0=(shift @dihedrals))) {
	$dihed0=($dihed0/3)+1;
	$dihed1=((shift @dihedrals)/3)+1;
	$dihed2=((shift @dihedrals)/3)+1;
	$dihed3=((shift @dihedrals)/3)+1;
	shift @dihedrals;

	if (defined $atom[$dihed0]) {
	    $fail=0;
	    # Check if dihed0 is terminal
	    foreach $tmp ( @{ $connect{$dihed0} } ) {
		if ($tmp != $dihed1) {
		    $fail = 1;
		}
	    }
	    # Check if dihed1 has only root and terminal connections
	    foreach $tmp ( @{ $connect{$dihed1} } ) {
		if ($tmp != $dihed0 && $tmp != $dihed2) {
		    foreach $tmp2 ( @{ $connect{$tmp} } ) {
			if ( $tmp2 != $dihed1 ) {
			    $fail = 1;
			}
		    }
		}
	    }

	    # Check that root3 has connections
	    $i=0;
	    foreach $tmp ( @{ $connect{$dihed3} } ) {
		if ($tmp != $dihed1) {$i++;}
	    }
	    if ($i==0) { $fail = 1; }
	}
	if ($fail == 0) { 
	    ($root1,$root2,$root3) = ($dihed0,$dihed1,$dihed2);
	    last; 
	}
    }
    if ($fail == 1) {
	die ("Error: Could not autoselect root atoms\n");
    } else {
	print OUTPUT "Atoms $root1, $root2, $root3 selected as Root atoms\n";
    }
}

sub print_sample_input() {
    print << 'EOF'
###################################################################
# Sample amber2accent input file
###################################################################
# The amber prmtop and trajectory file
#
PRMTOP  tol_wat.prm
#
CRD     tol_wat.crd
#
###################################################################
# This determines if ptraj will be used to calculate BAT values
# PTRAJ_EXEC should be the path/name of your ptraj executable
# (default 'ptraj')
#
USE_PTRAJ       1
#
PTRAJ_EXEC      /usr/local/amber9/exe/ptraj
#
###################################################################
# Select which residues make up the solute
# acceptable entries have the form of comma separated
# numbers or number ranges (e.g. 5,7-12,18)
#
RESIDUES        1
#
###################################################################
# The root atoms to construct the connectivity tree, put AUTO
# instead of three numbers to try automatic root atom selection
# If handselected, the root atoms must be connected, root1 must be 
# terminal and root2 must be connected only to root and terminal 
# atoms
#
ROOT_ATOMS       AUTO
#ROOT_ATOMS     2       1       4
#
###################################################################
# If your solute contains two separate molecules, a pseudobond must
# be defined between them. The first atom given must be from the
# molecule containing the root atoms
#
#PSEUDO_BOND    15      16
#
###################################################################
EOF
}
