Bio::EnsEMBL::Pipeline::Tools Embl
Package variablesDescriptionGeneral documentationMethods
Toolbar
WebCvsRaw content
Package variables
No package variables defined.
Synopsis
No synopsis!
Description
Designed to only read one embl entry @ a time, as they can be big.
Methods
DESTROY{##
No description
Code
_fh
No description
Code
_non_EMBL
No description
Code
_parse_record
No description
Code
accession
No description
Code
clean
No description
Code
comment
No description
Code
date
No description
Code
description
No description
Code
fasta
No description
Code
feature_table
No description
Code
keywords
No description
Code
new
No description
Code
os
No description
Code
ox
No description
Code
parse
No description
Code
seq_length
No description
Code
sequence
No description
Code
sequence_line
No description
Code
sequence_version
No description
Code
taxon
No description
Code
which_database
No description
Code
Methods description
None available.
Methods code
DESTROY{##descriptionprevnextTop
sub DESTROY{## {
DESTRUCTOR if needed    my ($self) = @_;
    ## Clear/Correct Class Variables
} # +-------------------------------------------------+
# | useful methods |
# | |
# +-------------------------------------------------+
}
_fhdescriptionprevnextTop
sub _fh {
    my $self = shift;
    if(my $fh = shift){
        map { delete $self->{$_} } keys(%{$self});
        die "Not a filehandle [_fh]" unless ref($fh) eq "GLOB" || ref($fh) eq 'IO::String';
        $self->{'_fh'} = $fh;
    }
    return $self->{'_fh'};
}
_non_EMBLdescriptionprevnextTop
sub _non_EMBL {
    my $self = shift;
    $self->{'_non_EMBL'} = shift if @_;
    return $self->{'_non_EMBL'} ? 1 : 0;
}
_parse_recorddescriptionprevnextTop
sub _parse_record {
    my ($self,$string) = @_;
    $self->clean();
    $string =~ s/^SQ\s{3}(.+;)(.+)/$self->sequence($2,$1)/ems;
    $string =~ s/^([A-Z]{2})\s{3}(.+\n)/$self->{"_" . $1} .= $2/egm;
    if($self->{'_FH'}){
        chop $self->{'_FT'};
        my @keys = split(/\n\b/, $self->{'_FT'});
        delete $self->{'_FT'};
        foreach(@keys){
            my @qualifiers = split(/\s+\//, $_);
            my ($key,$location) = split(/\s+/,shift(@qualifiers));
#	        my $hash = { map { ($a, $b) = split('=') } @qualifiers }; # unfortunately not, [non unique $a!] :(
# $self->{'_FT'}->{$key}->{$location} = $hash || {};
my $hash; foreach(@qualifiers){ my ($qualifier, $value) = (split(/=/),'')[0,1]; $value =~ s/^\"|\"$//g; $value =~ s/(\s?)\n\s+(\B)/$1/g; push(@{$self->{'_FT'}->{$key}->{$location}->{$qualifier}}, $value); } } } else{ $self->_non_EMBL(1); } # print "found accession: <" . join(" ", @{$self->accession()}) .">\n";
# print ", sequence version: <". $self->sequence_version() . ">";
# print ", description: <" . $self->description() . ">\n";
# print " * fasta:\n" . $self->fasta() . "\n";
}
accessiondescriptionprevnextTop
sub accession {
    my $self = shift;
    $self->{'_AC'} =~ s/[;]//g;
    unless(ref($self->{'_AC'}) eq "ARRAY"){
        $self->{'_AC'} = [ split(/\s+/, $self->{'_AC'}) ];
    }
    return $self->{'_AC'};
}
cleandescriptionprevnextTop
sub clean {
    my $self = shift;
    map { delete $self->{$_} } keys(%{$self});
}
commentdescriptionprevnextTop
sub comment {
    my $self = shift;
    return $self->{1}->{'_CC'};
}
datedescriptionprevnextTop
sub date {
    my $self = shift;
    unless( ref($self->{'_DT'}) eq "ARRAY"){
        $self->{'_DT'} = [ split("\n",$self->{'_DT'}) ];
    }
    return $self->{'_DT'};
}
descriptiondescriptionprevnextTop
sub description {
    my $self = shift;
    $self->{'_DE'} =~ s/\n/ /g;
    $self->{'_DE'} =~ s/\.\s$//;
    return $self->{'_DE'};
}
fastadescriptionprevnextTop
sub fasta {
    my $self = shift;
    my ($length, $fasta) = (60,undef);
    my $seq = $self->sequence();
    unless($fasta = [ $seq =~ /\G.{1,$length}/g ]){
        $fasta = [ $seq ];
    }
    return ">" . $self->description() . "\n" . join("\n",@{$fasta});
}
feature_tabledescriptionprevnextTop
sub feature_table {
    my $self = shift;
    return $self->{'_FT'};
}
keywordsdescriptionprevnextTop
sub keywords {
    my $self = shift;
    unless(ref($self->{'_KW'}) eq "ARRAY"){
        $self->{'_KW'} =~ s/\n|\.$//g;
        $self->{'_KW'} = [ split(/;\s?/, $self->{'_KW'}) ];
    }
    return $self->{'_KW'};
}
newdescriptionprevnextTop
sub new {
    my ($proto) = shift;
    my $class = ref($proto) || $proto;
    my $p;
    if(ref($_[0]) eq "HASH"){($p) = @_ ;}
    else{ $p = {@_} ;}
    my $self = {
        _debug => 0,
        _acc   => undef,
        _fh    => undef,
        map { "_".substr($_,1) => $p->{$_} } keys(%$p)
    };
    bless ($self, $class);
    if($self->{'_debug'} > 2){
        require 'Data/Dumper.pm';
        print Data::Dumper->Dump([$self],['self']);
    }
    return $self;
}
osdescriptionprevnextTop
sub os {
    my $self = shift;
    $self->{'_OS'} =~ s/[\.$|\n]//g;
    return $self->{'_OS'};
}
oxdescriptionprevnextTop
sub ox {
    # what's OX? a chop'ed OXO??
my $self = shift; $self->{'_OX'} =~ s/[;\n]//g; return $self->{'_OX'};
}
parsedescriptionprevnextTop
sub parse {
    my ($self,$string) = @_;
    if($string){$self->_parse_record($string)}
    elsif($self->_fh){
        local $/ = "//\n";
        my $FH = $self->_fh();
        my $single_check = 0;
        while(<$FH>){
            next unless $single_check < 1;
            $self->_parse_record($_);
            $single_check++;
        }
    }
}
seq_lengthdescriptionprevnextTop
sub seq_length {
    my ($self, $obj) = @_;
    my $arr = [ split(";", $self->sequence_line) ];
    my $first = shift @{$arr};
    my $length = $1 if $first =~ /(\d+)/;
    return $length;
}
sequencedescriptionprevnextTop
sub sequence {
    my ($self, $sequence, $sequence_line) = @_;
    if(@_){
        $self->sequence_line($sequence_line);
        $self->{'_sequence'} = $sequence;
        $self->{'_sequence'} =~ s/[\s0-9\/]//g;
        return '';
    }
    return $self->{'_sequence'};
}
sequence_linedescriptionprevnextTop
sub sequence_line {
    my $self = shift;
    $self->{'_SQ'} = shift if @_;
    return $self->{'_SQ'};
}
sequence_versiondescriptionprevnextTop
sub sequence_version {
    my $self = shift;
    $self->{'_SV'} =~ s/\n//g if $self->{'_SV'};
    return $self->{'_SV'};
}
taxondescriptionprevnextTop
sub taxon {
    my ($self, $obj) = @_;
    my $ft = $self->feature_table();
    my $ret;
    if(ref($ft) eq "HASH"){
        foreach my $location(keys %{$ft->{'source'}}){
            foreach(@{$ft->{'source'}->{$location}->{'db_xref'}}){
                return $1 if /taxon:(\d+)/;
            }
        }
        }else{
        return $1 if $self->ox =~ /NCBI_TaxID=(\d+)/;
    }
    return undef;
}
which_databasedescriptionprevnextTop
sub which_database {
    my ($self, $obj, $ret) = @_;
    if($self->_non_EMBL){
        $ret = 'SWISSPROT';
        my $date = shift( @{$self->date} );
        $ret = 'TrEMBL' if $date =~ /TrEMBL/;
        }else{
        $ret = 'EMBL'; # default assumption
} return $ret; } 1;
}
General documentation
Top