Bio::EnsEMBL::Pipeline
SeqFetcher
Toolbar
Summary
Bio::EnsEMBL::Pipeline::SeqFetcher
Package variables
No package variables defined.
Included modules
Inherit
Synopsis
my $obj = Bio::EnsEMBL::Pipeline::SeqFetcher->new(
);
$obj->pfetch('/path/to/pfetch');
my $seq = $obj->run_pfetch('z87703');
$obj->getz('/path/to/getz');
my $seq2 = $obj->run_getz('z87703','embl emblnew');
Description
Object to perform various sequence retrieval functions
Methods
Methods description
Title : bp_format Usage : $self->bp_format('Fasta') Function: Get/set for a bioperl index format Returns : String representing format Args : String representing format |
Title : bp_index Usage : $self->bp_index('/usr/local/ensembl/data/bp.inx') Function: Get/set for a bioperl index Returns : path to bioperl index Args : path to bioperl index |
Title : efetch Usage : $self->efetch('/usr/local/ensembl/bin/efetch') Function: Get/set for efetch executable Returns : Args : path to efetch executable |
Title : getz Usage : $self->getz('/usr/local/ensembl/bin/getz') Function: Get/set for getz executable Returns : Args : path to getz executable |
Title : parse_header Usage : my $newid = $self->parse_header($id); Function: Parses different sequence headers Returns : string Args :string to be parsed |
Title : pfetch Usage : $self->pfetch('/usr/local/ensembl/bin/pfetch') Function: Get/set for pfetch executable Returns : Args : path to pfetch executable |
Title : run_bp_search Usage : $self->run_bp_search($id,$inx,$format) Function: Retrieves a sequence from the specified Bioperl index Returns : Bio::Seq, or undef Args : id of sequence to be retrieved, string representing path to bioperl index, string representing index format |
Title : run_efetch Usage : $self->run_efetch($id) Function: Retrieves a sequence using efetch Returns : Bio::Seq, or undef Args : id of sequence to be retrieved |
Title : run_getz Usage : $self->run_getz($id,$libs) Function: Retrieves a sequence using getz from the specified libraries Returns : Bio::Seq, or undef Args : id of sequence to be retrieved, string representing libraries to be searched |
Title : run_pfetch Usage : $self->run_pfetch($id) Function: Retrieves a sequence using pfetch Returns : Bio::Seq, or undef Args : id of sequence to be retrieved |
Methods code
sub bp_format
{ my ($self, $format) = @_;
if ($format) {
$self->{'_format'} = $format;
}
return $self->{'_format'}; } |
sub bp_index
{ my ($self, $inx) = @_;
if ($inx) {
$self->{'_inx'} = $inx;
}
return $self->{'_inx'}; } |
sub efetch
{ my ($self, $efetch) = @_;
if ($efetch) {
$self->{'_efetch'} = $efetch;
}
return $self->{'_efetch'}; } |
sub getz
{ my ($self, $getz) = @_;
if ($getz) {
$self->{'_getz'} = $getz;
}
return $self->{'_getz'}; } |
sub new
{ my ($class, @args) = @_;
my $self = $class->SUPER::new(@args);
return $self; } |
sub parse_header
{ my ($self,$id) = @_;
if (!defined($id)) {
$self->throw("No id input to parse_header");
}
my $newid = $id;
if ($id =~ /\/ug=(\S+)\s+/){
$newid = $1;
}
elsif ($id =~ /^(.*)\|(.*)\|(.*)/) {
if ($2 eq "UG") {
$newid = $3;
}
else {
$newid = $2;
}
$newid =~ s/(.*)\..*/$1/;
}
elsif ($id =~ /^..\:(.*)/) {
$newid = $1;
}
$newid =~ s/ //g;
print STDERR "newid: $newid\n";
return $newid; } |
sub pfetch
{ my ($self, $pfetch) = @_;
if ($pfetch) {
$self->{'_pfetch'} = $pfetch;
}
return $self->{'_pfetch'}; } |
sub run_bp_search
{ my ($self,$id,$inx,$format) = @_;
my $seq;
my $index;
if (!defined($id)) {
$self->throw("No id input to run_bp_search");
}
if (!defined($inx)){
$inx = $self->bp_index;
if (!defined($inx)) {
$self->throw("Cannot run_bp_search without an indexfile");
}
}
if (!defined($format)){
$format = $self->bp_format;
if (!defined($format) || ($format ne 'Fasta' &&
$format ne 'EMBL' &&
$format ne 'SwissPfam')) {
$self->throw("Cannot run_bp_search without a valid format: Fasta, EMBL or SwissPfam");
}
}
my $type = 'Bio::Index::' . $format;
eval {
$index = $type->new($inx);
};
if ($@) {
my $tmp = $@; $self->warn("Problem opening the index [$inx] - check you have supplied the right format!");
$self->throw ("[$tmp]!");
}
eval{
$seq = $index->fetch($id); };
$self->warn("Problem with run_bp_search for [$id]") unless defined $seq;
return $seq;
}
1; } |
sub run_efetch
{ my ($self,$id) = @_;
if (!defined($id)) {
$self->throw("No id input to run_efetch");
}
my $seqstr;
my $seq;
my $newid = $self->parse_header($id);
my $efetch = $self->efetch;
$efetch = 'efetch' unless defined($efetch);
open(IN,"$efetch -q $newid |") || $self->throw("Error running efetch for id [$newid]: $efetch");
$seqstr = <IN>;
close IN;
$seq = new Bio::Seq('-seq' => $seqstr, '-id' => $newid)
unless (!defined $seqstr || $seqstr =~ "not found");
return $seq; } |
sub run_getz
{ my ($self,$id,$libs) = @_;
if (!defined($id)) {
$self->throw("No id input to run_getz");
}
if (!defined($libs)) {
$self->throw("No libs input to run_getz");
}
my $seqstr;
my $seq;
my $newid = $self->parse_header($id);
$self->throw("Could not parse id [$id]") unless defined $newid;
my $getz = $self->getz;
$getz = 'getz' unless defined($getz);
open(IN, "getz -e '[libs={$libs}-ID:$id] | [libs-AccNumber:$id]' |")
|| $self->throw("Error running getz for id [$newid]: $getz");
my $format = 'EMBL';
if($libs eq 'mouseprot') { $format = 'Fasta'; }
my $fh = Bio::SeqIO->new('-fh' =>\* IN, "-format"=>$format);
$seq = $fh->next_seq();
close IN;
$self->warn("Problem with getz for [$id]") unless defined $seq;
return $seq; } |
sub run_pfetch
{ my ($self,$id) = @_;
if (!defined($id)) {
$self->throw("No id input to run_pfetch");
}
my $seqstr;
my $seq;
my $newid = $self->parse_header($id);
my $pfetch = $self->pfetch;
$pfetch = 'pfetch' unless defined($pfetch);
open(IN,"$pfetch -q $newid |") || $self->throw("Error running pfetch for id [$newid]: $pfetch");
$seqstr = <IN>;
close IN;
chomp($seqstr);
if(defined $seqstr && $seqstr ne "no match") {
$seq = new Bio::Seq('-seq' => $seqstr,
'-id' => $newid);
}
return $seq; } |
General documentation
Describe contact details here
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _