Bio::EnsEMBL::Pipeline::Tools
Embl
Toolbar
Package variables
No package variables defined.
Synopsis
No synopsis!
Description
Designed to only read one embl entry @ a time, as they can be big.
Methods
DESTROY{## | No description | Code |
_fh | No description | Code |
_non_EMBL | No description | Code |
_parse_record | No description | Code |
accession | No description | Code |
clean | No description | Code |
comment | No description | Code |
date | No description | Code |
description | No description | Code |
fasta | No description | Code |
feature_table | No description | Code |
keywords | No description | Code |
new | No description | Code |
os | No description | Code |
ox | No description | Code |
parse | No description | Code |
seq_length | No description | Code |
sequence | No description | Code |
sequence_line | No description | Code |
sequence_version | No description | Code |
taxon | No description | Code |
which_database | No description | Code |
Methods description
None available.
Methods code
sub DESTROY{##
{DESTRUCTOR if needed my ($self) = @_;
}
} |
sub _fh
{ my $self = shift;
if(my $fh = shift){
map { delete $self->{$_} } keys(%{$self});
die "Not a filehandle [_fh]" unless ref($fh) eq "GLOB" || ref($fh) eq 'IO::String';
$self->{'_fh'} = $fh;
}
return $self->{'_fh'}; } |
sub _non_EMBL
{ my $self = shift;
$self->{'_non_EMBL'} = shift if @_;
return $self->{'_non_EMBL'} ? 1 : 0; } |
sub _parse_record
{ my ($self,$string) = @_;
$self->clean();
$string =~ s/^SQ\s{3}(.+;)(.+)/$self->sequence($2,$1)/ems;
$string =~ s/^([A-Z]{2})\s{3}(.+\n)/$self->{"_" . $1} .= $2/egm;
if($self->{'_FH'}){
chop $self->{'_FT'};
my @keys = split(/\n\b/, $self->{'_FT'});
delete $self->{'_FT'};
foreach(@keys){
my @qualifiers = split(/\s+\//, $_);
my ($key,$location) = split(/\s+/,shift(@qualifiers));
my $hash;
foreach(@qualifiers){
my ($qualifier, $value) = (split(/=/),'')[0,1];
$value =~ s/^\"|\"$//g;
$value =~ s/(\s?)\n\s+(\B)/$1/g;
push(@{$self->{'_FT'}->{$key}->{$location}->{$qualifier}}, $value);
}
}
}
else{
$self->_non_EMBL(1);
}
} |
sub accession
{ my $self = shift;
$self->{'_AC'} =~ s/[;]//g;
unless(ref($self->{'_AC'}) eq "ARRAY"){
$self->{'_AC'} = [ split(/\s+/, $self->{'_AC'}) ];
}
return $self->{'_AC'}; } |
sub clean
{ my $self = shift;
map { delete $self->{$_} } keys(%{$self}); } |
sub comment
{ my $self = shift;
return $self->{1}->{'_CC'}; } |
sub date
{ my $self = shift;
unless( ref($self->{'_DT'}) eq "ARRAY"){
$self->{'_DT'} = [ split("\n",$self->{'_DT'}) ];
}
return $self->{'_DT'}; } |
sub description
{ my $self = shift;
$self->{'_DE'} =~ s/\n/ /g;
$self->{'_DE'} =~ s/\.\s$//;
return $self->{'_DE'}; } |
sub fasta
{ my $self = shift;
my ($length, $fasta) = (60,undef);
my $seq = $self->sequence();
unless($fasta = [ $seq =~ /\G.{1,$length}/g ]){
$fasta = [ $seq ];
}
return ">" . $self->description() . "\n" . join("\n",@{$fasta}); } |
sub feature_table
{ my $self = shift;
return $self->{'_FT'}; } |
sub keywords
{ my $self = shift;
unless(ref($self->{'_KW'}) eq "ARRAY"){
$self->{'_KW'} =~ s/\n|\.$//g;
$self->{'_KW'} = [ split(/;\s?/, $self->{'_KW'}) ];
}
return $self->{'_KW'}; } |
sub new
{ my ($proto) = shift;
my $class = ref($proto) || $proto;
my $p;
if(ref($_[0]) eq "HASH"){($p) = @_ ;}
else{ $p = {@_} ;}
my $self = {
_debug => 0,
_acc => undef,
_fh => undef,
map { "_".substr($_,1) => $p->{$_} } keys(%$p)
};
bless ($self, $class);
if($self->{'_debug'} > 2){
require 'Data/Dumper.pm';
print Data::Dumper->Dump([$self],['self']);
}
return $self; } |
sub os
{ my $self = shift;
$self->{'_OS'} =~ s/[\.$|\n]//g;
return $self->{'_OS'}; } |
sub ox
{ my $self = shift;
$self->{'_OX'} =~ s/[;\n]//g;
return $self->{'_OX'}; } |
sub parse
{ my ($self,$string) = @_;
if($string){$self->_parse_record($string)}
elsif($self->_fh){
local $/ = "//\n";
my $FH = $self->_fh();
my $single_check = 0;
while(<$FH>){
next unless $single_check < 1;
$self->_parse_record($_);
$single_check++;
}
} } |
sub seq_length
{ my ($self, $obj) = @_;
my $arr = [ split(";", $self->sequence_line) ];
my $first = shift @{$arr};
my $length = $1 if $first =~ /(\d+)/;
return $length; } |
sub sequence
{ my ($self, $sequence, $sequence_line) = @_;
if(@_){
$self->sequence_line($sequence_line);
$self->{'_sequence'} = $sequence;
$self->{'_sequence'} =~ s/[\s0-9\/]//g;
return '';
}
return $self->{'_sequence'}; } |
sub sequence_line
{ my $self = shift;
$self->{'_SQ'} = shift if @_;
return $self->{'_SQ'}; } |
sub sequence_version
{ my $self = shift;
$self->{'_SV'} =~ s/\n//g if $self->{'_SV'};
return $self->{'_SV'}; } |
sub taxon
{ my ($self, $obj) = @_;
my $ft = $self->feature_table();
my $ret;
if(ref($ft) eq "HASH"){
foreach my $location(keys %{$ft->{'source'}}){
foreach(@{$ft->{'source'}->{$location}->{'db_xref'}}){
return $1 if /taxon:(\d+)/;
}
}
}else{
return $1 if $self->ox =~ /NCBI_TaxID=(\d+)/;
}
return undef; } |
which_database | description | prev | next | Top |
sub which_database
{ my ($self, $obj, $ret) = @_;
if($self->_non_EMBL){
$ret = 'SWISSPROT';
my $date = shift( @{$self->date} );
$ret = 'TrEMBL' if $date =~ /TrEMBL/;
}else{
$ret = 'EMBL'; }
return $ret;
}
1; } |
General documentation