Bio::EnsEMBL::Pipeline SeqFetcher
SummaryIncluded librariesPackage variablesSynopsisDescriptionGeneral documentationMethods
Toolbar
WebCvsRaw content
Summary
Bio::EnsEMBL::Pipeline::SeqFetcher
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Root
Bio::Index::EMBL
Bio::Index::Fasta
Bio::Index::SwissPfam
Bio::Seq
Bio::SeqIO
Inherit
Bio::EnsEMBL::Root
Synopsis
    my $obj = Bio::EnsEMBL::Pipeline::SeqFetcher->new(
);
$obj->pfetch('/path/to/pfetch');
my $seq = $obj->run_pfetch('z87703');
$obj->getz('/path/to/getz');
my $seq2 = $obj->run_getz('z87703','embl emblnew');
Description
Object to perform various sequence retrieval functions
Methods
bp_formatDescriptionCode
bp_indexDescriptionCode
efetchDescriptionCode
getzDescriptionCode
new
No description
Code
parse_headerDescriptionCode
pfetchDescriptionCode
run_bp_searchDescriptionCode
run_efetchDescriptionCode
run_getzDescriptionCode
run_pfetchDescriptionCode
Methods description
bp_formatcode    nextTop
    Title   :   bp_format
Usage : $self->bp_format('Fasta')
Function: Get/set for a bioperl index format
Returns : String representing format
Args : String representing format
bp_indexcodeprevnextTop
    Title   :   bp_index
Usage : $self->bp_index('/usr/local/ensembl/data/bp.inx')
Function: Get/set for a bioperl index
Returns : path to bioperl index
Args : path to bioperl index
efetchcodeprevnextTop
    Title   :   efetch
Usage : $self->efetch('/usr/local/ensembl/bin/efetch')
Function: Get/set for efetch executable
Returns :
Args : path to efetch executable
getzcodeprevnextTop
    Title   :   getz
Usage : $self->getz('/usr/local/ensembl/bin/getz')
Function: Get/set for getz executable
Returns :
Args : path to getz executable
parse_headercodeprevnextTop
  Title   : parse_header
Usage : my $newid = $self->parse_header($id);
Function: Parses different sequence headers
Returns : string
Args :string to be parsed
pfetchcodeprevnextTop
    Title   :   pfetch
Usage : $self->pfetch('/usr/local/ensembl/bin/pfetch')
Function: Get/set for pfetch executable
Returns :
Args : path to pfetch executable
run_bp_searchcodeprevnextTop
    Title   :   run_bp_search
Usage : $self->run_bp_search($id,$inx,$format)
Function: Retrieves a sequence from the specified Bioperl index
Returns : Bio::Seq, or undef
Args : id of sequence to be retrieved,
string representing path to bioperl index,
string representing index format
run_efetchcodeprevnextTop
    Title   :   run_efetch
Usage : $self->run_efetch($id)
Function: Retrieves a sequence using efetch
Returns : Bio::Seq, or undef
Args : id of sequence to be retrieved
run_getzcodeprevnextTop
    Title   :   run_getz
Usage : $self->run_getz($id,$libs)
Function: Retrieves a sequence using getz from the specified libraries
Returns : Bio::Seq, or undef
Args : id of sequence to be retrieved, string representing libraries to be searched
run_pfetchcodeprevnextTop
    Title   :   run_pfetch
Usage : $self->run_pfetch($id)
Function: Retrieves a sequence using pfetch
Returns : Bio::Seq, or undef
Args : id of sequence to be retrieved
Methods code
bp_formatdescriptionprevnextTop
sub bp_format {
    my ($self, $format) = @_;
    if ($format) {
	$self->{'_format'} = $format;
    }
    return $self->{'_format'};
}
bp_indexdescriptionprevnextTop
sub bp_index {
    my ($self, $inx) = @_;
    if ($inx) {
	$self->{'_inx'} = $inx;
    }
    return $self->{'_inx'};
}
efetchdescriptionprevnextTop
sub efetch {
    my ($self, $efetch) = @_;
    if ($efetch) {
	$self->{'_efetch'} = $efetch;
    }
  return $self->{'_efetch'};
}
getzdescriptionprevnextTop
sub getz {
    my ($self, $getz) = @_;
    if ($getz) {
	$self->{'_getz'} = $getz;
    }
    return $self->{'_getz'};
}
newdescriptionprevnextTop
sub new {
  my ($class, @args) = @_;
  my $self = $class->SUPER::new(@args);
  return $self;
}
parse_headerdescriptionprevnextTop
sub parse_header {
    my ($self,$id) = @_;

    if (!defined($id)) {
	$self->throw("No id input to parse_header");
    }

    my $newid = $id;

    if ($id =~ /\/ug=(\S+)\s+/){
      $newid = $1;
    }
    
    elsif ($id =~ /^(.*)\|(.*)\|(.*)/) {
      if ($2 eq "UG") {
	$newid = $3;
      }
      else {
	$newid = $2;
      }
      $newid =~ s/(.*)\..*/$1/;
      
    }
 
    elsif ($id =~ /^..\:(.*)/) {
	$newid = $1;
    }

    $newid =~ s/ //g;

    print STDERR "newid: $newid\n";

    return $newid;
}
pfetchdescriptionprevnextTop
sub pfetch {
    my ($self, $pfetch) = @_;

    if ($pfetch) {
	$self->{'_pfetch'} = $pfetch;
    }
    return $self->{'_pfetch'};
}
run_bp_searchdescriptionprevnextTop
sub run_bp_search {
  my ($self,$id,$inx,$format) = @_;
  my $seq;
  my $index;

  if (!defined($id)) {
    $self->throw("No id input to run_bp_search");
  }
  
  if (!defined($inx)){
    $inx = $self->bp_index;
    if (!defined($inx)) {
      $self->throw("Cannot run_bp_search without an indexfile");
    }
  }

  if (!defined($format)){
      $format = $self->bp_format;
      if (!defined($format) || ($format ne 'Fasta' && 
				$format ne 'EMBL' && 
				$format ne 'SwissPfam')) {
	  $self->throw("Cannot run_bp_search without a valid format: Fasta, EMBL or SwissPfam");
      }
  }
  
  my $type = 'Bio::Index::' . $format;
  eval {
    $index = $type->new($inx);
  };

  if ($@) {
    my $tmp = $@; # for some reason, warn empties out $@ ...
$self->warn("Problem opening the index [$inx] - check you have supplied the right format!"); $self->throw ("[$tmp]!"); } # get the sequence
eval{ $seq = $index->fetch($id); # Returns Bio::Seq object
}; $self->warn("Problem with run_bp_search for [$id]") unless defined $seq; return $seq; } 1;
}
run_efetchdescriptionprevnextTop
sub run_efetch {
  my ($self,$id) = @_;

  if (!defined($id)) {
    $self->throw("No id input to run_efetch");
  }  

  my $seqstr;
  my $seq;
  my $newid      = $self->parse_header($id);
  my $efetch     = $self->efetch;

  # if efetch path not explicitly set, assume it's in $PATH
$efetch = 'efetch' unless defined($efetch); open(IN,"$efetch -q $newid |") || $self->throw("Error running efetch for id [$newid]: $efetch"); $seqstr = <IN>; close IN; # chomp($seqstr);
$seq = new Bio::Seq('-seq' => $seqstr, '-id' => $newid) unless (!defined $seqstr || $seqstr =~ "not found"); return $seq;
}
run_getzdescriptionprevnextTop
sub run_getz {
  my ($self,$id,$libs) = @_;

  if (!defined($id)) {
    $self->throw("No id input to run_getz");
  }

  if (!defined($libs)) {
    $self->throw("No libs input to run_getz");
  }

  my $seqstr;
  my $seq;
  my $newid      = $self->parse_header($id);
  $self->throw("Could not parse id [$id]") unless defined $newid;
  my $getz       = $self->getz;

  # if getz path not explicitly set, assume it's in $PATH
$getz = 'getz' unless defined($getz); open(IN, "getz -e '[libs={$libs}-ID:$id] | [libs-AccNumber:$id]' |") || $self->throw("Error running getz for id [$newid]: $getz"); # hack just for rikens
my $format = 'EMBL'; if($libs eq 'mouseprot') { $format = 'Fasta'; } my $fh = Bio::SeqIO->new('-fh' =>\* IN, "-format"=>$format); $seq = $fh->next_seq(); close IN; $self->warn("Problem with getz for [$id]") unless defined $seq; return $seq;
}
run_pfetchdescriptionprevnextTop
sub run_pfetch {
  my ($self,$id) = @_;  
  if (!defined($id)) {
    $self->throw("No id input to run_pfetch");
  }  

  my $seqstr;
  my $seq;
  my $newid      = $self->parse_header($id);
  my $pfetch     = $self->pfetch;

  # if pfetch path not explicitly set, assume it's in $PATH
$pfetch = 'pfetch' unless defined($pfetch); open(IN,"$pfetch -q $newid |") || $self->throw("Error running pfetch for id [$newid]: $pfetch"); $seqstr = <IN>; close IN; chomp($seqstr); if(defined $seqstr && $seqstr ne "no match") { $seq = new Bio::Seq('-seq' => $seqstr, '-id' => $newid); } return $seq;
}
General documentation
CONTACTTop
Describe contact details here
APPENDIXTop
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _