Bio::EnsEMBL::Pipeline::SeqFetcher
xdget
Toolbar
Summary
Bio::EnsEMBL::Pipeline::SeqFetcher::xdget
Package variables
No package variables defined.
Included modules
Inherit
Synopsis
my $obj = Bio::EnsEMBL::Pipeline::SeqFetcher::xdget->new(
-executable => '/blah/xdget',
-db => '/data/db'
);
my $seq = $obj->get_Seq_by_acc($acc);
Description
Object to retrieve sequences using xdget (Wash U).
Database must be formatted with xdformat. Sequence type (protein
or nucleotide) is guessed, based on file extensions of the database
files. Returns sequence as Bio::Seq.
Additional options for xdget can be specifed though no checking
is performed for compatibility.
Note that, at the time of writing, xdget is case-insensitive:
retrieved sequence is in upper case, irrespective of what was
in the original unformatted fasta file.
Methods
Methods description
Title : db Usage : $self->db('/path/to/db'); Function: Get/set for the db to be used by the module. Returns : string Args : string |
Title : executable Usage : $self->executable('/path/to/executable'); Function: Get/set for the executable to be used by the module. Returns : string Args : string |
Title : get_Seq_by_acc Usage : $self->get_eq_by_acc($accession); Function: retrieves sequence via xdget Returns : Bio::Seq Args : Sequence identifier string |
Title : options Usage : $self->options('-r'); Returns reverse complement of nucleotide sequence Function: Get/set for options to xdget Returns : string Args : string |
Methods code
sub _moltype
{ my ($self, $db, $type) = @_;
return undef unless $db;
if ($type) {
$self->{'_moltype'}{$db} = $type;
}
return $self->{'_moltype'}{$db};
}
1; } |
sub db
{ my ($self, $dbref) = @_;
if ($dbref) {
foreach my $db (@$dbref) {
my @f = glob "$db.x??";
my @p = grep (/\.xp./,@f);
my @n = grep (/\.xn./,@f);
if (@p) {
$self->_moltype($db, 'p');
} elsif (@n) {
$self->_moltype($db, 'n');
} else {
throw("XDF database $db appears to be missing files");
}
push @{$self->{'_db'}}, $db;
}
}
return $self->{'_db'}; } |
sub executable
{ my ($self, $exe) = @_;
if ($exe) {
$self->{'_exe'} = $exe;
}
return $self->{'_exe'}; } |
sub get_Seq_by_acc
{ my ($self, $acc) = @_;
throw("No accession input") unless $acc;
throw("No database defined") unless $self->db;
my $xdget = $self->executable;
my $db = $self->db;
local *FH;
my $seq;
my $seqstr;
my $desc;
my $command;
my @out;
DB: foreach my $db (@{$self->db}) {
my $options = $self->options;
if ($self->_moltype($db) eq 'n') {
$options .= " -n";
}
else {
$options .= " -p";
}
$command = "$xdget $options $db $acc";
open FH, "$command 2>&1 |" or throw("Error retrieving $acc from $db with $xdget");
@out = <FH>;
close FH;
last DB if $out[0] !~ /Not found/;
}
$desc = shift @out;
$seqstr = join(" ", @out);
$desc =~ s/^>//;chomp $desc;
$seqstr =~ s/\s//g;
$seq = Bio::Seq->new(
-seq => $seqstr,
-display_id => $acc,
-accession_number => $acc,
-desc => $desc
);
return $seq; } |
sub new
{ my ($class, @args) = @_;
my $self = bless {}, $class;
my ($exe, $options, $db) = rearrange([qw(
EXECUTABLE OPTIONS DB
)], @args);
$exe ||= 'xdget';
$self->executable($exe);
$self->options($options) if defined $options;
$self->db($db) if defined $db;
return $self; } |
sub options
{
my ($self, $options) = @_;
if ($options) {
$self->{'_options'} = $options;
}
return $self->{'_options'}; } |
General documentation
ensembl-dev@ebi.ac.uk
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _