#!/usr/bin/env perl #$Id$ # Copyright © 2005, Stowers Institute for Medical Research. All # rights reserved. # c.f. attached LICENSE =pod =head1 SYNOPSIS trexhgt speciestree genetrees trexhgt --man | --help | --version | --verbose =head1 OPTIONS Options are specified on the command line. Their case is ignored, and they may be abbreviated to uniqueness (i.e. --h instead of --help). =over =item B<--help> Display command line usage with options. =item B<--man> Display complete manual page and exit. =item B<--verbose> Provides a trace of processing on STDERR. =item B<--version> Display the scripts version number and exit. =back =head1 DESCRIPTION Automate submission and results retrieval to form http://www.trex.uqam.ca/index.php?action=hgt&project=trex, for determination of Horizontal Gene Transfer. The contents of speciestree will provide the value for the 'species tree' and must be formatted suitably. The contents of genetrees is taken as one or more 'gene tree' - any line containing a single integer will be taken to start a new genetree. Each of which must be formatted suitably for submission to the form. The form will be submitted once for each genetree. The results of each submission are concatenated on standard output. =head1 EXAMPLES =over trexhgt --verbose rem_0708inf_upd.dis POG*.dis Using file rem_0708inf_upd.dis as a common speciestree, submit all the genetrees in files matching 'POG8.dis' in the current directory =item C< trexhgt --man > print a manpage =back =head1 VERSION $Revision: 0.01$ =head1 AUTHOR Malcolm Cook (mec@stowers-institute.org) =head1 DEPENDENCIES perl Modules: File::Slurp; WWW::Mechanize; HTML::Strip; =head1 AVAILABILITY Email the author for sources. =cut use warnings; use strict; #use Data::Dumper; #use vars qw($VERSION $VC_DATE); #BEGIN { our $VERSION = qw$Revision: 0.01 $[-1]; our $VC_DATE = qw$Date: $[-2]; #} use Getopt::Long; use Pod::Usage; use FindBin; my $man = 0; my $help = 0; my $verbose; my $version; GetOptions('help|?' => \$help, 'man!' => \$man, 'verbose!' => \$verbose, 'version!' => \$version, ) or pod2usage(2); pod2usage(1) if $help; pod2usage(-exitstatus => 0, -verbose => 2) if $man; if ($version) {print "$FindBin::Script: $VERSION\n"; exit(0)}; my $speciestree = shift || pod2usage("missing speciestree"); pod2usage("speciestree $speciestree does not exist") unless -e $speciestree; #my $genetrees = shift || pod2usage("missing genetrees"); #pod2usage("genetrees $genetrees does not exist") unless -e $genetrees; #unshift @ARGV, $genetrees; # use File::Slurp; use WWW::Mechanize; use HTML::Strip; my $browser = WWW::Mechanize->new(autocheck => 1, timeout => 1800, # 30 minutes, instead of default 3! ); my $st=read_file($speciestree); package MyHTMLparser; use HTML::Parser; use vars qw(@ISA); @ISA = qw(HTML::Parser); my %inside; sub tag { my($tag, $num) = @_; $inside{$tag} += $num; #print " "; # not for all tags } sub text { my ($self,$text) = @_; return if $inside{script} || $inside{style}; $self->{myText} .= $text; } package main; #my $hs = HTML::Strip->new(); my $hs = MyHTMLparser->new(); while (my $genetree = shift) { my $HGTOut = "$genetree.trexhgt"; if (-e $HGTOut) { warn "$genetree already has computed $HGTOut. Skipping!"; next; } print STDERR "trexhgt: processing $genetree\n" if $verbose; $browser->get("http://www.trex.uqam.ca/index.php?action=hgt&project=trex") || die "getting"; #print STDERR "trexhgt: got!\n " if $verbose; my $form = $browser->form(0); $browser->field("dataInputSpecies" => $st) or die "setting dataInputSpecies: $!"; my $gt=read_file($genetree) or die "on read_file: $!"; $browser->field("dataInputGene" => $gt) or die "setting dataInputGene: $!"; $form->action("http://www.trex.uqam.ca/loadData.php"); #print STDERR "trexhgt: loadData !\n"; #print STDERR "trexhgt: submitting!\n"; $browser->submit() or die "on submit: $!"; #$browser->click() or die "on submit: $!"; #print STDERR "trexhgt: submitted!\n"; open (HGT,"> $HGTOut"); if ( $browser->follow_link( text => 'Results file') ) { print HGT $browser->content; } else { warn "No Results file for $genetree\n "; $hs->parse( $browser->content ); my $clean_text = $hs->{myText}; $hs->eof; #if ($clean_text =~ /Results for HGT Detection(.*)back/mg) { if ($clean_text =~ /Results for HGT detection(.*)back/mig) { print STDERR $1; $clean_text = "ERROR: " . $1; } print HGT $clean_text; } } exit 0; __END__ use HTML::Form; use LWP::UserAgent; use HTTP::Request; use HTTP::Response; my $ua = LWP::UserAgent->new; my ($response, $request); my $form; $response = $ua->get("http://www.trex.uqam.ca/index.php?action=hgt&project=trex"); $form = HTML::Form->parse($response); $form->action("http://www.trex.uqam.ca/loadData.php"); $form->value(dataInputSpecies => <<"EOF"); 4 zAlpha 0 2 3 3 zBeta 2 0 3 3 zGamma 3 3 0 2 zEpsilon 3 3 2 0 EOF $form->value(dataInputGene => <<"EOF"); 4 zAlpha 0 3 3 2 zBeta 3 0 2 3 zGamma 3 2 0 3 zEpsilon 2 3 3 0 EOF $request = $form->click; $response = $ua->request($request) || die 'while requesting'; my $form2 = HTML::Form->parse($response); $form2->dump; exit 0; __END__