BioPerl documentation.

  my $db="EMBL";
  my $file="../data/M20132";
  my $id="HSANDREC";

  my $loader=Bio::LiveSeq::IO::BioPerl->load(-db=>"$db", -file=>"$file");
                        or
  my $loader=Bio::LiveSeq::IO::BioPerl->load(-db=>"$db", -id=>"$id");

  my @translationobjects=$loader->entry2liveseq();

  my $genename="AR";
  my $gene=$loader->gene2liveseq(-gene_name => "$genename",
                                    -getswissprotinfo => 0);

  NOTE1: The only -db now supported is EMBL. Hence it defaults to EMBL.
  NOTE2: -file requires a filename (and path if necessary) containing an
               EMBL entry
         -id will use Bio::DB::EMBL.pm to fetch the sequence from the web,
               (bioperl wraparound to [w]getz from SRS)
  NOTE3: To retrieve the swissprot (if possible) attached to the embl entry
               (to get protein domains at dna level), only Bio::DB::EMBL.pm
               is supported under BioPerl. Refer to Bio::LiveSeq::IO::SRS
               otherwise.
  NOTE4: NOTE3 is not implemented yet for bioperl, working on it

sub embl2hash {

  my $seqobj=$_[0];
  my %valid_features; my %valid_names;
  if ($_[1]) {
    %valid_features = map {$_, 1} @{$_[1]}; # to skip features
  }
  if ($_[2]) {
    %valid_names = map {$_, 1} @{$_[2]}; # to skip qualifiers
  }

  my $annobj = $seqobj->annotation(); # what's this?

  my $entry_Sequence = lc($seqobj->seq()); # SRS returns lowercase

  my $entry_ID = $seqobj->display_id;
  my $entry_AccNumber = $seqobj->accession; # or maybe accession_number ?
  my $secondary_acc; # to fetch the other acc numbers
  foreach $secondary_acc ($seqobj->get_secondary_accessions) { # not working!
    $entry_AccNumber .= " $secondary_acc";
  }
  my $entry_Molecule = $seqobj->molecule; # this alone returns molec+division
  my $entry_Division = $seqobj->division;
  # fixed: now Molecule works in BioPerl, no need for next lines
  #my @Molecule=split(" ",$entry_Molecule);
  #my $entry_Division = pop(@Molecule); # only division
  #$entry_Molecule = join(" ",@Molecule); # only molecule
  my $entry_Description = $seqobj->desc;

  my $speciesobj = $seqobj->species;
  my $entry_Organism = $speciesobj->binomial;

  my $entry_SeqLength = $seqobj->length;
  
  # put into the hash
  my %entryhash;
  $entryhash{ID}=$entry_ID;
  $entryhash{AccNumber}=$entry_AccNumber;
  $entryhash{Molecule}=$entry_Molecule;
  $entryhash{Division}=$entry_Division;
  $entryhash{Description}=$entry_Description;
  $entryhash{Organism}=$entry_Organism;
  $entryhash{Sequence}=$entry_Sequence;
  $entryhash{SeqLength}=$entry_SeqLength;

  my @topfeatures=$seqobj->top_SeqFeatures();
  # create features array
  my $featuresnumber= scalar(@topfeatures);
  $entryhash{FeaturesNumber}=$featuresnumber;
  my $feature_name;
  my @feature_qual_names; my @feature_qual_value;
  my ($feature_qual_name,$feature_qual_number);
  my @features;

  my ($feat,$qual,$subfeat);
  my @subfeat;
  my $i=0;
  foreach $feat (@topfeatures) {
      my %feature;
      $feature_name = $feat->primary_tag;
      unless ($valid_features{$feature_name}) {
	  #print "skipping $feature_name\n";
	  next;
      }
# works ok with 0.6.2
#    if ($feature_name eq "CDS_span") { # case of CDS with various exons 0.6.2
#      $feature_name="CDS"; # 0.6.2
      my $featlocation=$feat->location; # 0.7
      if (($feature_name eq "CDS")&&($featlocation->isa('Bio::Location::SplitLocationI'))) { # case of CDS with various exons BioPerl 0.7
#      @subfeat=$feat->sub_SeqFeature; # 0.6.2
	  @subfeat=$featlocation->sub_Location(); # 0.7
	  my @transcript;
	  foreach $subfeat (@subfeat) {
	      my @range;
	      if ($subfeat->strand == -1) {
		  @range=($subfeat->end,$subfeat->start,$subfeat->strand);
	      } else {
		  @range=($subfeat->start,$subfeat->end,$subfeat->strand);
	      }
	      push (@transcript,\@range);
	  }
	  $feature{range}=\@transcript;
      } else {
	  my @range;
	  ($feat->strand == -1) ? (@range = ($feat->end, $feat->start, $feat->strand) ) :
	      (@range = ( $feat->start,$feat->end,$feat->strand) );
# works ok with 0.6.2
	  if ($feature_name eq "CDS") { # case of single exon CDS (CDS name but not split location)
	      my @transcript=(\@range);
	      $feature{range}=\@transcript;
	  } else { # all other range features
	      $feature{range}=\@range;
	  }
      }
      $feature{location}="deprecated";
      
      $feature{position}=$i;
      $feature{name}=$feature_name;
      
      @feature_qual_names= $feat->all_tags();
      $feature_qual_number= scalar(@feature_qual_names);
      
      $feature{qual_number}=$feature_qual_number;
      
      my %feature_qualifiers;
      for $qual (@feature_qual_names) {
	  $feature_qual_name=$qual;
	  unless ($valid_names{$feature_qual_name}) {
	      next;
	  }
      @feature_qual_value=$feat->each_tag_value($qual);
	  #print "$qual => @feature_qual_value \n";
	  $feature_qualifiers{$feature_qual_name}=$feature_qual_value[0]; # ?
      # maybe the whole array should be entered, not just the 1st element?
	  # what could be the other elements? TOCHECK!
      }
      $feature{qualifiers}=\%feature_qualifiers;
      push (@features,\%feature); # array of features
      $i++;
  }
  $entryhash{Features}=\@features; # put this also into the hash
  
  my @cds; # array just of CDSs
  for $i (0..$#features) {
      if ($features[$i]->{'name'} eq "CDS") {
	  push(@cds,$features[$i]);
      }
  }
  $entryhash{CDS}=\@cds; # put this also into the hash
  return (\%entryhash);

}

sub load {

  my ($thing, %args) = @_;
  my $class = ref($thing) || $thing;
  my ($obj,%loader);

  my ($db,$filename,$id)=($args{-db},$args{-file},$args{-id});

  if (defined($db)) {
    unless ($db eq "EMBL") {
      carp "Note: only EMBL now supported!";
      return(0);
    }
  } else {
    $db="EMBL";
  }

  if (defined($id) && defined($filename)) {
    carp "You can either specify a -id or a -filename!";
    return(0);
  }

  unless (defined($id) || defined($filename)) {
    carp "You must specify either a -id or a -filename!";
    return(0);
  }

  my $hashref;
  if ($db eq "EMBL") {
    my $test_transl=0; # change to 0 to avoid comparison of translation

    # these can be changed for future needs
    my @embl_valid_feature_names=qw(CDS CDS_span exon prim_transcript intron repeat_unit repeat_region mRNA);
    my @embl_valid_qual_names=qw(gene codon_start db_xref product note number rpt_family transl_table);

    # dunno yet how to implement test_transl again....
    # probably on a one-on-one basis with each translation?
    if ($test_transl) {
      push (@embl_valid_qual_names,"translation"); # needed for test_transl
    }

    my $seqobj; # bioperl sequence object, to be passed to embl2hash

    if (defined($filename)) {
      my $stream = Bio::SeqIO->new('-file' => $filename, '-format' => 'EMBL');
      $seqobj = $stream->next_seq();
    } else { # i.e. if -id
      my $embl = new Bio::DB::EMBL;
      $seqobj = $embl->get_Seq_by_id($id); # EMBL ID or ACC
    }

    $hashref=&embl2hash($seqobj,\@embl_valid_feature_names,\@embl_valid_qual_names);
  }
  unless ($hashref) { return (0); }

  %loader = (db => $db, filename => $filename, id => $id, hash => $hashref);
  $obj =\% loader;
  $obj = bless $obj, $class;
  return $obj;

}

sub novelaasequence2gene {

  my ($self, %args) = @_;
  my ($gene_name,$cusg_data,$aasequence,$ttabid)=($args{-gene_name},$args{-cusg_data},$args{-aasequence},$args{-translation_table});

  my @species_codon_usage;
  unless ($aasequence) {
    carp "aasequence not given";
    return (0);
  }
  unless ($gene_name) {
    $gene_name="Novel Unknown";
  }
  unless ($ttabid) {
    $ttabid=1;
  }
  unless ($cusg_data) {
    @species_codon_usage=
	qw(68664 118404 126679 51100 125600 123646 75667 210903 435317
	139009 79303 135218 128429 192616 49456 161556 211962 131222
	162837 213626 69346 140780 182506 219428 76684 189374 173010
	310626 82647 202329 180955 250410 180001 118798 76398 160764
	317359 119013 262630 359627 218376 186915 130857 377006 162826
	113684 317703 441298 287040 245435 174805 133427 134523 108740
	225633 185619 78463 240138 174021 244236 142435 8187 5913
	14381); # updated 21Jul2000
  } else {
    @species_codon_usage=split(/ /,$cusg_data);
  }
  
  my $gene=Bio::LiveSeq::IO::Loader::_common_novelaasequence2gene(\@species_codon_usage,$ttabid,$aasequence,$gene_name);
  return ($gene);
}

1;

}

embl2hash	Description	Code
load	Description	Code
novelaasequence2gene	Description	Code