Bio::DB::GFF::Adaptor biofetch
SummaryIncluded librariesPackage variablesSynopsisDescriptionGeneral documentationMethods
Toolbar
WebCvsRaw content
Summary
Bio::DB::GFF::Adaptor::biofetch -- Cache BioFetch objects in a Bio::DB::GFF database
Package variables
No package variables defined.
Included modules
Bio::DB::BioFetch
Bio::DB::GFF::Adaptor::dbi::mysql
Bio::DB::GFF::Util::Rearrange
Bio::SeqIO
Inherit
Bio::DB::GFF::Adaptor::dbi::mysql
Synopsis
Proof of principle. Not for production use.
Description
This adaptor is a proof-of-principle. It is used to fetch BioFetch
sequences into a Bio::DB::GFF database (currently uses a hard-coded
EMBL database) as needed. This allows the Generic Genome Browser to
be used as a Genbank/EMBL browser.
Methods
_load_embl
No description
Code
get_attributes
No description
Code
guess_name
No description
Code
load_from_embl
No description
Code
load_from_file
No description
Code
newDescriptionCode
refclass
No description
Code
segment
No description
Code
Methods description
newcode    nextTop
 Title   : new
Usage : $db = Bio::DB::GFF->new(-adaptor=>'biofetch',@args)
Function: create a new adaptor
Returns : a Bio::DB::GFF object
Args : see below
Status : Public
This is the constructor for the adaptor. It is called automatically
by Bio::DB::GFF->new. In addition to arguments that are common among
all adaptors, the following class-specific arguments are recgonized:
  Argument       Description
-------- -----------
-dsn the DBI data source, e.g. 'dbi:mysql:ens0040' -user username for authentication -pass the password for authentication -proxy [['http','ftp'],'http://proxy:8080']
-dsn,-user and -pass indicate the local database to cache results in,
and as are per Bio::DB::GFF::Adaptor::dbi. The -proxy argument allows
you to set the biofetch web proxy, and uses the same syntax described
for the proxy() method of Bio::DB::WebDBSeqI, except that the
argument must be passed as an array reference.
Methods code
_load_embldescriptionprevnextTop
sub _load_embl {
  my $self = shift;
  my $acc  = shift;
  my $seq  = shift;
  my $refclass = $self->refclass;
  my $locus    = $seq->id;

  # begin loading
$self->setup_load(); # first synthesize the entry for the top-level feature
my @aliases; foreach ($seq->accession,$seq->get_secondary_accessions) { next if lc($_) eq lc($acc); push @aliases,[Alias => $_]; } $self->load_gff_line( { ref => $acc, class => $refclass, source => 'EMBL', method => 'origin', start => 1, stop => $seq->length, score => undef, strand => '.', phase => '.', gclass => $self->refclass, gname => $acc, tstart => undef, tstop => undef, attributes => [[Note => $seq->desc],@aliases], } ); # now load each feature in turn
for my $feat ($seq->all_SeqFeatures) { my $attributes = $self->get_attributes($feat); my $name = $self->guess_name($attributes); my $location = $feat->location; my @segments = map {[$_->start,$_->end,$_->seq_id]} $location->can('sub_Location') ? $location->sub_Location : $location; my $type = $feat->primary_tag eq 'CDS' ? 'mRNA' : $feat->primary_tag; my $parttype = $feat->primary_tag eq 'gene' ? 'exon' : $feat->primary_tag; if ($feat->primary_tag =~ /^(gene|CDS)$/) { $self->load_gff_line( { ref => $acc, class => $refclass, source => 'EMBL', method => $type, start => $location->start, stop => $location->end, score => $feat->score || undef, strand => $feat->strand > 0 ? '+' : ($feat->strand < 0 ? '-' : '.'), phase => $feat->frame || '.', gclass => $name->[0], gname => $name->[1], tstart => undef, tstop => undef, attributes => $attributes, } ); @$attributes = (); } for my $segment (@segments) { $self->load_gff_line( { ref => $segment->[2] eq $locus ? $acc : $segment->[2], class => $refclass, source => 'EMBL', method => $parttype, start => $segment->[0], stop => $segment->[1], score => $feat->score || undef, strand => $feat->strand > 0 ? '+' : ($feat->strand < 0 ? '-' : '.'), phase => $feat->frame || '.', gclass => $name->[0], gname => $name->[1], tstart => undef, tstop => undef, attributes => $attributes, } ); } } # finish loading
$self->finish_load(); # now load the DNA
$self->load_sequence_string($acc,$seq->seq); 1;
}
get_attributesdescriptionprevnextTop
sub get_attributes {
  my $self = shift;
  my $seq  = shift;

  my @tags = $seq->all_tags or return;
  my @result;
  foreach my $tag (@tags) {
    foreach my $value ($seq->each_tag_value($tag)) {
      push @result,[$tag=>$value];
    }
  }\@
  result;
}
guess_namedescriptionprevnextTop
sub guess_name {
  my $self = shift;
  my $attributes = shift;
  my @ordered_attributes = sort {($preferred_tags{$a->[0]} || 0) <=> ($preferred_tags{$b->[0]} || 0)} @$attributes;
  my $best = pop @ordered_attributes;
  @$attributes = @ordered_attributes;
  return $best;
}



1;
}
load_from_embldescriptionprevnextTop
sub load_from_embl {
  my $self = shift;
  my $db   = shift;
  my $acc  = shift or $self->throw('Must provide an accession ID');

  my $biofetch;
  if ($self->{_biofetch}{$db}) {
    $biofetch = $self->{_biofetch}{$db};
  } else {
    $biofetch = $self->{_biofetch}{$db} = Bio::DB::BioFetch->new(-db=>$db);
    $biofetch->retrieval_type('tempfile');
    $biofetch->proxy(@{$self->{_proxy}}) if $self->{_proxy};
  }

  my $seq  = eval {$biofetch->get_Seq_by_id($acc)} or return;
  $self->_load_embl($acc,$seq);
  1;
}
load_from_filedescriptionprevnextTop
sub load_from_file {
  my $self = shift;
  my $file = shift;

  my $format = $file =~ /\.(gb|genbank|gbk)$/i ? 'genbank' : 'embl';

  my $seqio = Bio::SeqIO->new( '-format' => $format, -file => $file);
  my $seq   = $seqio->next_seq;

  $self->_load_embl($seq->accession,$seq);
  1;
}
newdescriptionprevnextTop
sub new {
  my $class = shift;
  my $self  = $class->SUPER::new(@_);
  my ($proxy) = rearrange(['PROXY'],@_);
  if ($proxy) {
    my @args = ref($proxy) ? @$proxy : eval $proxy;
    $self->{_proxy} =\@ args if @args;
  }
  $self;
}
refclassdescriptionprevnextTop
sub refclass {
  my $self = shift;
  my $refname = shift;
  'Accession';
}
segmentdescriptionprevnextTop
sub segment {
  my $self = shift;
  my @segments = $self->SUPER::segment(@_);

  if (!@segments) {
    my $refclass = $self->refclass;

    my %args = $self->setup_segment_args(@_);
    if ($args{-class} && $args{-class} =~ /$refclass/oi) {
      return unless $self->load_from_embl('embl'=>$args{-name});
      @segments = $self->SUPER::segment(@_);
    } elsif ($args{-class} && $args{-class} =~ /refseq|swall|embl/i) { #hack to get refseq names
return unless $self->load_from_embl(lc($args{-class})=>$args{-name}); $args{-class} = $self->refclass; @segments = $self->SUPER::segment(%args); } } $self->_multiple_return_args(@segments); } # default is to return 'Sequence' as the class of all references
}
General documentation
AUTHORTop
Lincoln Stein <lstein@cshl.org>.
Copyright 2002 Cold Spring Harbor Laboratory.
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.