Bio::SeqIO
locuslink
Toolbar
Summary
Bio::SeqIO::locuslink - DESCRIPTION of Object
Package variables
Privates (from "my" definitions)
%anntype_map = ( SimpleValue => [qw( ALIAS_PROT ALIAS_SYMBOL CDD CHR CURRENT_LOCUSID ECNUM EXTANNOT MAP NC NR OFFICIAL_GENE_NAME OFFICIAL_SYMBOL PHENOTYPE PREFERRED_GENE_NAME PREFERRED_PRODUCT PREFERRED_SYMBOL PRODUCT RELL SUMFUNC ) ], Comment => [qw( SUMMARY ) ], )
%dbname_map = ( pfam => 'Pfam', smart => 'SMART', NM => 'RefSeq', NP => 'RefSeq', XP => 'RefSeq', XM => 'RefSeq', NG => 'RefSeq', XG => 'RefSeq', XR => 'RefSeq', PROT => 'GenBank', ACCNUM => 'GenBank', CONTIG => 'GenBank', OMIM => 'MIM', GRIF => 'GRIF', STS => 'STS', UNIGENE => 'UniGene', )
@locuslink_keys = qw( ACCNUM ALIAS_PROT ALIAS_SYMBOL ASSEMBLY BUTTON CDD CHR COMP CONTIG CURRENT_LOCUSID DB_DESCR DB_LINK ECNUM EVID EXTANNOT GO GRIF LINK LOCUSID LOCUS_CONFIRMED LOCUS_TYPE MAP MAPLINK NC NG NM NP NR OFFICIAL_GENE_NAME OFFICIAL_SYMBOL OMIM ORGANISM PHENOTYPE PHENOTYPE_ID PMID PREFERRED_GENE_NAME PREFERRED_PRODUCT PREFERRED_SYMBOL PRODUCT PROT RELL STATUS STS SUMFUNC SUMMARY TRANSVAR TYPE UNIGENE XG XM XP XR )
%flatten_tags = ( ASSEMBLY => '', ORGANISM => '', OFFICIAL_SYMBOL => '', OFFICIAL_GENE_NAME => '', LOCUSID => '', PMID => '', PREFERRED_SYMBOL => ', ', PREFERRED_GENE_NAME => ', ')
%cddprefix = ( pfam => 'PF', smart => 'SM', )
%alternate_map = ( OFFICIAL_GENE_NAME => 'PREFERRED_GENE_NAME', OFFICIAL_SYMBOL => 'PREFERRED_SYMBOL', )
@ll_firstelements = qw( NM NP NG XG XM XP XR PROT STS ACCNUM CONTIG GRIF )
%feature_pat_map = map { ($_ , "^$_: (.+)\n"); } @locuslink_keys
Included modules
Inherit
Synopsis
# don't instantiate directly - instead do
my $seqio = Bio::SeqIO->new(-format => "locuslink", -file => \STDIN);
Description
This module parses LocusLink into Bio::SeqI objects with rich
annotation, but no sequence.
The input file has to be in the LL_tmpl format - the tabular format
will not work.
The way the current implementation populates the object is rather a
draft work than a finished work of art. Note that at this stage the
locuslink entries cannot be round-tripped, because the parser loses
certain information. For instance, most of the alternative transcript
descriptions are not retained. The parser also misses any element
that deals with visual representation (e.g., 'button') except for the
URLs. Almost all of the pieces of the annotation are kept in the
Bio::Annotation::Collection object.
Methods
_initialize | No description | Code |
add_annotation | No description | Code |
add_annotation_ref | No description | Code |
make_unique | No description | Code |
next_seq | No description | Code |
read_dblink | No description | Code |
read_reference | No description | Code |
read_species | No description | Code |
search_pattern | No description | Code |
show_obj | No description | Code |
Methods description
None available.
Methods code
sub _initialize
{ my($self,@args) = @_;
$self->SUPER::_initialize(@args);
foreach my $key(@ll_firstelements){
$feature_pat_map{$key}="^$key: ([^|]+)";
}
foreach my $key(keys %cddprefix) {
$feature_pat_map{$key}='^CDD: .+\|'.$key.'(\d+)';
}
$feature_pat_map{MAP} = '^MAP: (.+?)\|';
$feature_pat_map{MAPHTML} = '^MAP: .+\|(<.+>)\|';
$feature_pat_map{GO} = '^GO: .+\|.+\|\w+\|(GO:\d+)\|';
$feature_pat_map{GO_DESC} = '^GO: .+\|(.+)\|\w+\|GO:\d+\|';
$feature_pat_map{GO_CAT} = '^GO: (.+)\|.+\|\w+\|GO:\d+\|';
$feature_pat_map{EXTANNOT} = '^EXTANNOT: (.+)\|(.+)\|\w+\|.+\|\d+';
if(! $self->sequence_factory()) {
$self->sequence_factory(Bio::Seq::SeqFactory->new(
-type => 'Bio::Seq::RichSeq'));
}
}
} |
sub add_annotation
{
my ($ac,$type,$text,$anntype)=@_;
my @args;
$anntype = 'SimpleValue' unless $anntype;
SWITCH : {
$anntype eq 'SimpleValue' && do {
push(@args, -value => $text, -tagname => $type);
last SWITCH;
};
$anntype eq 'Comment' && do {
push(@args, -text => $text, -tagname => 'comment');
last SWITCH;
};
}
$ac->add_Annotation("Bio::Annotation::$anntype"->new(@args));
return($ac);
}
} |
sub add_annotation_ref
{
my ($ann,$type,$textref)=@_;
my @text=$textref ? @$textref : ();
foreach my $text(@text){
$ann->add_Annotation($type,Bio::Annotation::SimpleValue->new(-value => $text));
}
return($ann);
}
} |
sub make_unique
{
my ($ann,$key) = @_;
my %seen = ();
foreach my $dbl ($ann->remove_Annotations($key)) {
if(! $seen{$dbl->as_text()}) {
$seen{$dbl->as_text()} = 1;
$ann->add_Annotation($dbl);
}
}
return $ann;
}
} |
sub next_seq
{
my ($self, @args)=@_;
my (@results,$search,$ref,$cddref);
local $/="\n>>";
return unless my $entry = $self->_readline;
if (index($entry,'>>') == 0) { $entry = substr($entry,2);
}
my %record = ();
while($entry && ($entry =~ /\w/)) {
if (!($entry=~/LOCUSID/)){
$self->throw("No LOCUSID in first line of record. ".
"Not LocusLink in my book.");
}
if($entry =~ /^CURRENT_LOCUSID:/m) {
$entry = $self->_readline;
%record = ();
next;
}
foreach my $key (keys %feature_pat_map){
$search=$feature_pat_map{$key};
@results=$self->search_pattern($entry,'FALSE',$search,$search);
$record{$key} = @results ? [@results] : undef;
} last;
}
return unless %record;
my ($PRESENT,@keep);
foreach my $key(keys %cddprefix){
if($record{$key}) {
@keep=();
foreach my $list (@{$record{$key}}) {
push(@keep,$cddprefix{$key}.$list);
}
$record{$key} = [@keep];
}
}
if($record{CDD}) {
@keep=();
foreach my $cdd (@{$record{CDD}}) {
$PRESENT = undef;
foreach my $key (keys %cddprefix) {
if ($cdd=~/$key/){
$PRESENT = 1;
last;
}
}
push(@keep,$cdd) if(! $PRESENT);
}
$record{CDD} = [@keep];
}
my $ann = Bio::Annotation::Collection->new();
foreach my $field(keys %dbname_map){
$ann=read_dblink($ann,$dbname_map{$field},$record{$field});
}
if($record{GO}) {
for(my $j = 0; $j < @{$record{GO}}; $j++) {
my $goann = Bio::Annotation::OntologyTerm->new(
-identifier => $record{GO}->[$j],
-name => $record{GO_DESC}->[$j],
-ontology => $record{GO_CAT}->[$j]);
$ann->add_Annotation($goann);
}
}
$ann=add_annotation_ref($ann,'URL',$record{LINK});
$ann=add_annotation_ref($ann,'URL',$record{DB_LINK});
make_unique($ann,'dblink');
foreach my $anntype (keys %anntype_map) {
foreach my $key (@{$anntype_map{$anntype}}){
if($record{$key}){
foreach (@{$record{$key}}){
$ann=add_annotation($ann,$key,$_,$anntype);
}
}
}
}
foreach my $field (keys %flatten_tags) {
if($record{$field}) {
$record{$field} = join($flatten_tags{$field},
@{$record{$field}});
}
}
$ann=read_reference($ann,'PUBMED',$record{PMID});
if($record{ASSEMBLY}) {
my @assembly=split(/,/,$record{ASSEMBLY});
$ann=read_dblink($ann,'GenBank',\@assembly);
}
foreach my $fieldval (keys %alternate_map){
if((! $record{$fieldval}) && ($record{$alternate_map{$fieldval}})){
$record{$fieldval}=$record{$alternate_map{$fieldval}};
}
}
my $seq = $self->sequence_factory->create(
-verbose => $self->verbose(),
-accession_number => $record{LOCUSID},
-desc => $record{OFFICIAL_GENE_NAME},
-display_id => $record{OFFICIAL_SYMBOL},
-species => read_species($record{ORGANISM}),
-annotation => $ann);
return($seq);
}
} |
sub read_dblink
{
my ($ann,$db,$ref)=@_;
my @results=$ref ? @$ref : ();
foreach my $id(@results){
if($id){
$ann->add_Annotation('dblink',
Bio::Annotation::DBLink->new(
-database =>$db ,
-primary_id =>$id));
}
}
return($ann);
}
} |
sub read_reference
{
my ($ann,$db,$results)=@_;
if($results){
chomp($results);
my @ids=split(/,/,$results);
$ann = read_dblink($ann,$db,\@ids) if @ids;
}
return $ann;
}
} |
sub read_species
{
my ($spline)=@_;
my $species;
my $genus;
($genus,$species)=$spline=~/([^ ]+) ([^ ]+)/;
my $make = Bio::Species->new();
$make->classification( ($species,$genus) );
return $make;
}
} |
sub search_pattern
{
my ($self,
$entry, $searchconfirm, $searchpattern,
$searchtype) = @_;
my @query = $entry=~/$searchpattern/gm;
if ($searchconfirm ne "FALSE"){
$self->warn("No $searchtype found\n$entry\n") unless @query;
foreach (@query){
if (!($_=~/$searchconfirm/)){
$self->throw("error\n$entry\n$searchtype parse $_ does not match $searchconfirm\n");
}
} } return(@query);
}
} |
sub show_obj
{
my ($seqlistref)=@_;
my @list=@$seqlistref;
my $out = Bio::SeqIO->new('-fh' =>\* STDOUT, -format => 'genbank' );
my ($ann,@values,$val);
foreach my $seq(@list){
$out->write_seq($seq);
$ann=$seq->annotation;
foreach my $key ( $ann->get_all_annotation_keys() ) {
@values = $ann->get_Annotations($key);
foreach my $value ( @values ) {
$val=$value->as_text;
print "Annotation ",$key,"\t\t",$val,"\n";
}
}
}
}
1; } |
General documentation
User feedback is an integral part of the evolution of this and other
Bioperl modules. Send your comments and suggestions preferably to
the Bioperl mailing list. Your participation is much appreciated.
bioperl-l@bioperl.org - General discussion
http://bioperl.org/MailList.shtml - About the mailing lists
Report bugs to the Bioperl bug tracking system to help us keep track
of the bugs and their resolution. Bug reports can be submitted via
email or the web:
bioperl-bugs@bioperl.org
http://bugzilla.bioperl.org/
Email kching at gnf.org
Describe contact details here
Hilmar Lapp, hlapp at gmx.net
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _