#!/usr/bin/perl -w

use strict;
use Getopt::Long;
use Data::Dumper;
my $out = "Newick-step.txt";
my $in = "ClusterMerging.txt"; 
my $help;
my $count;
my $mode = "merge"; 
my $step = 20;
GetOptions('in=s' => \$in, 'count=i' => \$count, 'step=i' => \$step, 'out=s' => \$out, 'mode=s' => \$mode, 'help' => \$help);

if ($help) {
	&usage();
    exit;
}

my %newick;
my %newick_count;

open(IN, "$in") || die ("Cannot open $in. $!\n");

for (my $i = 0; $i <= $count; $i++) {
	$newick_count{$i} = 1;
}
my @file = <IN>;
if ($mode && $mode =~ /split/) {
	@file = reverse(@file);
}
my ($n1, $n2, $n3) = split(' ', $file[0]);
@file = reverse(@file) if ($n2 < 0 && $n3 < 0);
foreach my $line (@file) {
	($n1, $n2, $n3) = split(' ', $line);
    chop($n1) if (substr($n1, length($n1)-1) eq ":");
    $newick_count{$n1} = $newick_count{$n2}+$newick_count{$n3};
#    print $line;
#    print "$newick_count{$n2}, $newick_count{$n3}\n";
}

#exit;
my $knum = keys %newick_count;

for (my $i = $#file - $step; $i <= $#file; $i++) {
	my $line = $file[$i];
	($n1, $n2, $n3) = split(' ', $line);
    chop($n1) if (substr($n1, length($n1)-1) eq ":");
    $newick{$n2} = $newick_count{$n2} if (not defined $newick{$n2});
    $newick{$n3} = $newick_count{$n3} if (not defined $newick{$n3});
    $newick{$n1} = "($newick{$n2},$newick{$n3})";
    delete $newick{$n2};
    delete $newick{$n3};
    
}

$knum = keys %newick;
print "Merging is not finished. $knum clusters remain.\n"if ($knum > 1);

open(OUT, ">$out") || die ("Cannot open $out. $!\n");
print OUT "(";
my @key = keys %newick;
print OUT $newick{$key[0]};
for (my $i = 1; $i < $knum; $i++) {
	print OUT ",".$newick{$key[$i]};
}
print OUT ");";


sub usage() {
	print<<"USAGE";

Usage: Newick-step.pl -in ClusteringMerging.txt -out Newick-step.txt -mode (split) -step 20 -count countnumber | -help\n";
                      -in      ClusteringMerging.txt or ClusteringSplitting.txt generated by ptraj-clustering. Default: ClusteringMerging.txt
                      -out     The Newick format of the phylogenetic tree. Default: Newick.txt
                      -mode    If it is Hierarchical algorithm, use "-mode split", otherwise, treat as merge.
                      -step    The last n steps. Default value: 20.
                      -count   Please provide the number of total frames in the trajectory.
                      -help    Show this info.

USAGE
}
