#!/usr/bin/env perl

#$Id$

# Copyright  2005, Stowers Institute for Medical Research.  All
# rights reserved.

# c.f. attached LICENSE

=pod

=head1 SYNOPSIS

trexhgt speciestree genetrees

trexhgt --man | --help | --version | --verbose

=head1 OPTIONS

Options are specified on the command line.  Their case is ignored, and
they may be abbreviated to uniqueness (i.e. --h instead of --help).

=over

=item B<--help>

Display command line usage with options.

=item B<--man>

Display complete manual page and exit.

=item B<--verbose>

Provides a trace of processing on STDERR.

=item B<--version> 

Display the scripts version number and exit.

=back

=head1 DESCRIPTION


Automate submission and results retrieval to form
http://www.trex.uqam.ca/index.php?action=hgt&project=trex, for
determination of Horizontal Gene Transfer.

The contents of speciestree will provide the value for the 'species tree' and must be formatted suitably.

The contents of genetrees is taken as one or more 'gene tree' - any
line containing a single integer will be taken to start a new
genetree.  Each of which must be formatted suitably for submission to
the form.

The form will be submitted once for each genetree.

The results of each submission are concatenated on standard output.

=head1 EXAMPLES

=over trexhgt --verbose rem_0708inf_upd.dis POG*.dis

Using file rem_0708inf_upd.dis as a common speciestree, submit all the
genetrees in files matching 'POG8.dis' in the current directory

=item C< trexhgt --man >

print a manpage

=back

=head1 VERSION

$Revision:  0.01$

=head1 AUTHOR

Malcolm Cook (mec@stowers-institute.org)

=head1 DEPENDENCIES

perl

Modules: File::Slurp; WWW::Mechanize; HTML::Strip;

=head1 AVAILABILITY

Email the author for sources.

=cut

use warnings;
use strict;
#use Data::Dumper;

#use vars qw($VERSION $VC_DATE);

#BEGIN {
our $VERSION =  qw$Revision: 0.01 $[-1];
our $VC_DATE =  qw$Date: $[-2];
#}

use Getopt::Long;
use Pod::Usage;
use FindBin;

my $man = 0;
my $help = 0;
my $verbose;
my $version;

GetOptions('help|?'        => \$help,
	   'man!'           => \$man,
	   'verbose!'       => \$verbose,
           'version!'       => \$version,
) or pod2usage(2);

pod2usage(1) if $help;
pod2usage(-exitstatus => 0, -verbose => 2) if $man;
if ($version) {print "$FindBin::Script: $VERSION\n"; exit(0)};

my $speciestree = shift || pod2usage("missing speciestree");
pod2usage("speciestree $speciestree does not exist") unless -e $speciestree;

#my $genetrees = shift || pod2usage("missing genetrees");
#pod2usage("genetrees $genetrees does not exist") unless -e $genetrees;
#unshift @ARGV, $genetrees;	# 
use File::Slurp;
use WWW::Mechanize;
use HTML::Strip;

my $browser = WWW::Mechanize->new(autocheck => 1,
				  timeout => 1800, # 30 minutes, instead of default 3!
				 );
my $st=read_file($speciestree);

package MyHTMLparser;
use HTML::Parser;
use vars qw(@ISA);
@ISA = qw(HTML::Parser);

my %inside;

sub tag
{
   my($tag, $num) = @_;
   $inside{$tag} += $num;
   #print " ";  # not for all tags
}

sub text {
   my ($self,$text) = @_;
   return if $inside{script} || $inside{style};
   $self->{myText} .= $text;
}

package main;

#my $hs = HTML::Strip->new();
my $hs = MyHTMLparser->new();

while (my $genetree = shift) {
  my $HGTOut = "$genetree.trexhgt";
  if (-e $HGTOut) {
    warn "$genetree already has computed $HGTOut.  Skipping!";
    next;
  }
  print STDERR "trexhgt: processing $genetree\n" if $verbose;
  $browser->get("http://www.trex.uqam.ca/index.php?action=hgt&project=trex") || die "getting";
  #print STDERR "trexhgt: got!\n " if $verbose;
  my $form = $browser->form(0);
  $browser->field("dataInputSpecies" => $st) or die "setting dataInputSpecies: $!";
  my $gt=read_file($genetree) or die "on read_file: $!";
  $browser->field("dataInputGene" =>   $gt) or die "setting dataInputGene: $!";
  $form->action("http://www.trex.uqam.ca/loadData.php");
  #print STDERR "trexhgt: loadData !\n";
  #print STDERR "trexhgt: submitting!\n";
  $browser->submit() or die "on submit: $!";
  #$browser->click() or die "on submit: $!";
  #print STDERR "trexhgt: submitted!\n";

  open (HGT,"> $HGTOut");
  if ( $browser->follow_link( text => 'Results file') ) {
    print HGT $browser->content;
  } else {
    warn "No Results file for $genetree\n ";
    $hs->parse( $browser->content );
    my $clean_text = $hs->{myText};
    $hs->eof;
    #if ($clean_text =~ /Results for HGT Detection(.*)back/mg) {
    if ($clean_text =~ /Results for HGT detection(.*)back/mig) {
      print STDERR $1;
      $clean_text = "ERROR: " . $1;
    }
    print HGT $clean_text;
  }
}
exit 0;
__END__

use HTML::Form;
use LWP::UserAgent;
use HTTP::Request;
use HTTP::Response;

my $ua = LWP::UserAgent->new;
my ($response, $request);
my $form;

$response = $ua->get("http://www.trex.uqam.ca/index.php?action=hgt&project=trex");
$form = HTML::Form->parse($response);
$form->action("http://www.trex.uqam.ca/loadData.php");
$form->value(dataInputSpecies =>  <<"EOF");
4
zAlpha     0 2 3 3
zBeta      2 0 3 3
zGamma     3 3 0 2
zEpsilon   3 3 2 0
EOF

$form->value(dataInputGene =>  <<"EOF");
4
zAlpha     0 3 3 2
zBeta      3 0 2 3
zGamma     3 2 0 3
zEpsilon   2 3 3 0
EOF

$request = $form->click;

$response = $ua->request($request) || die 'while requesting';

my $form2 = HTML::Form->parse($response);
$form2->dump;



exit 0;

__END__






