SRS documentation.

sub embl2hash {

  my ($i,$j);
  my $query=$_[0];
  my %valid_features; my %valid_names;
  if ($_[1]) {
    %valid_features = map {$_, 1} @{$_[1]}; # to skip features
  }
  if ($_[2]) {
    %valid_names = map {$_, 1} @{$_[2]}; # to skip qualifiers
  }
  # SRS API used to fetch all relevant fields
  my $sess = new Session;
  my $set = $sess->query("[$query]", "");
  my $numEntries=$set->size();
  if ($numEntries < 1) {
    carp "No sequence entry found for the query $query";
    return (0);
  } elsif ($numEntries > 1) {
    carp "Too many entries found for the input given";
    return (0);
  } else {
    my $entry = $set->getEntry(0);
    my ($entry_ID,$entry_AccNumber,$entry_Molecule,$entry_Description,$entry_Organism,$entry_SeqLength,$entry_Sequence,$entry_Division);
    # Fetch what we can fetch without the loader
    $entry_ID = $entry->fieldToString("id","","","");
    $entry_AccNumber = $entry->fieldToString("acc","","","");
    $entry_Molecule = $entry->fieldToString("mol","","","");
    $entry_Division = $entry->fieldToString("div","","","");
    $entry_Description = $entry->fieldToString("des","","","");
    $entry_Description =~ s/\n/ /g;
    $entry_Organism = $entry->fieldToString("org","","","");
    $entry_SeqLength = $entry->fieldToString("sl","","","");
    # Now use the loader
    my $loadedentry = $entry->load("EMBL");
    # Fetch the rest via the loader
    $entry_Sequence = $loadedentry->attrStr("sequence");
    $entry_Sequence =~ s/\n//g; # from plain format to raw string

    # put into the hash
    my %entryhash;
    $entryhash{ID}=$entry_ID;
    $entryhash{AccNumber}=$entry_AccNumber;
    $entryhash{Molecule}=$entry_Molecule;
    $entryhash{Division}=$entry_Division;
    $entryhash{Description}=$entry_Description;
    $entryhash{Organism}=$entry_Organism;
    $entryhash{Sequence}=$entry_Sequence;
    $entryhash{SeqLength}=$entry_SeqLength;

    # create features array
    my $features = $loadedentry->attrObjList("features");
    my $featuresnumber= $features->size();
    $entryhash{FeaturesNumber}=$featuresnumber;
    my ($feature,$feature_name,$feature_location);
    my ($feature_qual_names,$feature_qual_values);
    my ($feature_qual_name,$feature_qual_value);
    my ($feature_qual_number,$feature_qual_number2);
    my @features;
    for $i (0..$featuresnumber-1) {
      my %feature;
      $feature = $features->get($i);
      $feature_name = $feature->attrStr("FtKey");

      #unless ($feature_name eq "CDS") {
      unless ($valid_features{$feature_name}) {
	#print "not valid feature: $feature_name\n";
	next;
      }
      #print "now processing feature: $feature_name\n";
      $feature_location = $feature->attrStr("FtLocation");
      $feature_location =~ s/\n//g;
      $feature_qual_names= $feature->attrStrList("FtQualNames");
      $feature_qual_values= $feature->attrStrList("FtQualValues");
      $feature_qual_number= $feature_qual_names->size();
      $feature_qual_number2= $feature_qual_values->size(); # paranoia check
      if ($feature_qual_number > $feature_qual_number2) {
	carp ("Warning with Feature at position $i ($feature_name): There are MORE qualifier names than qualifier values.");
	# this can happen, e.g. "/partial", let's move on, just issue a warning
	#return (0);
      } elsif ($feature_qual_number < $feature_qual_number2) {
	carp ("Error with Feature at position $i ($feature_name): There are LESS qualifier names than qualifier values. Stopped");
	return (0);
      }
      #} else {print "NUMBER OF QUALIFIERS: $feature_qual_number \n";} # DEBUG

      # Put things inside hash
      $feature{position}=$i;
      $feature{name}=$feature_name;

      # new range field in hash
      my @range;
      if ($feature_name eq "CDS") {
	@range=cdslocation2transcript($feature_location);
	$feature{locationtype}="joined";
      } else {
	if (index($feature_location,"join") == -1) {
	  @range=location2range($feature_location);
	} else { # complex location in feature different than CDS
	  @range=joinedlocation2range($feature_location);
	  $feature{locationtype}="joined";
	}
      }
      $feature{range}=\@range;
      $feature{location}="deprecated";
      my %feature_qualifiers;
      for $j (0..$feature_qual_number-1) {
	$feature_qual_name=$feature_qual_names->get($j);
	$feature_qual_name =~ s/^\///; # eliminates heading "/"

	# skip all not interesting (for now) qualifiers
        unless ($valid_names{$feature_qual_name}) {
	  #print "not valid name: $feature_qual_name\n";
	  next;
	}
	#print "now processing: $feature_qual_name\n";
	$feature_qual_value=$feature_qual_values->get($j);
	$feature_qual_value =~ s/^"//; # eliminates heading "
	$feature_qual_value =~ s/"$//; # eliminates trailing "
	$feature_qual_value =~ s/\n//g; # eliminates all new lines
	$feature_qualifiers{$feature_qual_name}=$feature_qual_value;
      }
      $feature{qualifiers}=\%feature_qualifiers;
      push (@features,\%feature); # array of features
    }
    $entryhash{Features}=\@features; # put this also into the hash
    my @cds; # array just of CDSs
    for $i (0..$#features) {
      if ($features[$i]->{'name'} eq "CDS") {
	push(@cds,$features[$i]);
      }
    }
    $entryhash{CDS}=\@cds; # put this also into the hash
    return (\%entryhash);
  }
}

# argument: location field of an EMBL feature
# returns: array with correct $start $end and $strand to create LiveSeq obj

}

sub load {

  my ($thing, %args) = @_;
  my $class = ref($thing) || $thing;
  my ($obj,%loader);

  my ($db,$query)=($args{-db},$args{-query});

  if (defined($db)) {
    unless ($db eq "EMBL") {
      carp "Note: only EMBL now supported!";
      return(0);
    }
  } else {
    $db="EMBL";
  }

  my $hashref;
  if ($db eq "EMBL") {
    my $test_transl=0; # change to 0 to avoid comparison of translation

    # these can be changed for future needs
    my @embl_valid_feature_names=qw(CDS exon prim_transcript intron repeat_unit repeat_region mRNA);
    my @embl_valid_qual_names=qw(gene codon_start db_xref product note number rpt_family transl_table);

    # dunno yet how to implement test_transl again....
    # probably on a one-on-one basis with each translation?
    if ($test_transl) {
      push (@embl_valid_qual_names,"translation"); # needed for test_transl
    }

    # not to have the whole program die because of SRS death
    eval {
      $hashref=&embl2hash("$query",\@embl_valid_feature_names,\@embl_valid_qual_names);
    };
    my $errormsg;
    if ($@) {
      foreach $errormsg (split(/\n/,$@)) {
	if (index($errormsg,"in cleanup") == -1) {
	  carp "SRS EMBL Loader: $@";
	}
      }
    }
  }
  unless ($hashref) { return (0); }

  %loader = (db => $db, query => $query, hash => $hashref);
  $obj =\% loader;
  $obj = bless $obj, $class;
  return $obj;

}

sub novelaasequence2gene {

  my ($self, %args) = @_;
  my ($gene_name,$species_name,$aasequence)=($args{-gene_name},$args{-genome},$args{-aasequence});
  unless ($aasequence) {
    carp "aasequence not given";
    return (0);
  }
  unless ($gene_name) {
    $gene_name="Novel Unknown";
  }
  unless ($species_name) {
    $species_name="Homo sapiens";
  }
  
  my $sess = new Session;
  my ($e,$numEntries,$set);

  # codonusage lookup
  my $codonusage_usage;
  my @species_codon_usage;
  $set = $sess->query("[codonusage:'$species_name']", "");
  $numEntries = $set->size();
  if ($numEntries > 0) {
    $e = $set->getEntry(0);
    $species_name = $e->fieldToString("id","","","");
    $codonusage_usage = $e->fieldToString("usage","","","");
    @species_codon_usage=split(/\s/,$codonusage_usage); # spaces or tabs
    if ($numEntries > 1) {
      carp "Search in Codon Usage DB resulted in $numEntries results. Taking the first one: $species_name";
    }
  } else {
      carp "Genome not found in codon usage DB.";
      return (0);
  }  

  # taxonomy lookup
  my $mito_flag = 0;
  my $species_origin;
  if ($species_name =~ /^Mitochondrion /) {
    $mito_flag = 1;
    $species_origin = $species_name;
    $species_origin =~ s/^Mitochondrion //;
    $set = $sess->query("[taxonomy-species:'$species_origin']", "");
  } elsif ($species_name =~ /^Chloroplast |^Kinetoplast |^Chromoplast /) {
    $species_origin = $species_name;
    $species_origin =~ s/^Chromoplast //;
    $species_origin =~ s/^Kinetoplast //;
    $species_origin =~ s/^Chloroplast //;
    $set = $sess->query("[taxonomy-species:'$species_origin']", "");
  } else {
    $set = $sess->query("[taxonomy-species:'$species_name']", "");
  }
  $numEntries = $set->size();
  my ($taxonomy_id,$taxonomy_gc,$taxonomy_mgc,$taxonomy_name);
  if ($numEntries > 0) {
    $e = $set->getEntry(0);
    $taxonomy_id = $e->fieldToString("id","","","");
    $taxonomy_name = $e->fieldToString("species","","","");
    $taxonomy_gc = $e->fieldToString("gc","","","");
    $taxonomy_mgc = $e->fieldToString("mgc","","","");
    if ($numEntries > 1) {
      carp "Note! More than one entry found in taxonomy DB for the genome query given. Using the first one: $taxonomy_name ($taxonomy_id)";
    }
  } else {
    carp "Genome not found in taxonomy DB.";
    return (0);
  }
  # Lookup appropriate translation table
  my $ttabid;
  if ($mito_flag) {
    $ttabid = $taxonomy_mgc;
  } else {
    $ttabid = $taxonomy_gc;
  }

  my $gene=Bio::LiveSeq::IO::Loader::_common_novelaasequence2gene(\@species_codon_usage,$ttabid,$aasequence,$gene_name);
  return ($gene);
}

1;

}

sub swissprot2hash {

  my ($i,$j);
  my $query=$_[0];
  # SRS API used to fetch all relevant fields
  my $sess = new Session;
  my $set = $sess->query("[$query]", "");
  my $numEntries = $set->size();
  if ($numEntries < 1) {
    carp "No sequence entry found for the query $query";
    return (0);
  } elsif ($numEntries > 1) {
    carp "Too many entries found for the input given";
    return (0);
  } else {
    my $entry = $set->getEntry(0);
    my ($entry_ID,$entry_AccNumber,$entry_Molecule,$entry_Description,$entry_Organism,$entry_SeqLength,$entry_Sequence,$entry_Gene);
    # Fetch what we can fetch without the loader
    $entry_ID = $entry->fieldToString("id","","","");
    $entry_AccNumber = $entry->fieldToString("acc","","","");
    $entry_Gene = $entry->fieldToString("gen","","","");
    $entry_Gene =~ s/\n/ /g;
    $entry_Description = $entry->fieldToString("des","","","");
    $entry_Description =~ s/\n/ /g;
    $entry_Organism = $entry->fieldToString("org","","","");
    chop $entry_Organism;
    $entry_SeqLength = $entry->fieldToString("sl","","","");
    # Now use the loader
    my $loadedentry = $entry->load("Swissprot");
    # Fetch the rest via the loader
    $entry_Sequence = $loadedentry->attrStr("Sequence");
    $entry_Sequence =~ s/\n//g; # from plain format to raw string

    # put into the hash
    my %entryhash;
    $entryhash{ID}=$entry_ID;
    $entryhash{AccNumber}=$entry_AccNumber;
    $entryhash{Molecule}=$entry_Molecule;
    $entryhash{Gene}=$entry_Gene;
    $entryhash{Description}=$entry_Description;
    $entryhash{Organism}=$entry_Organism;
    $entryhash{Sequence}=$entry_Sequence;
    $entryhash{SeqLength}=$entry_SeqLength;

    # create features array
    my $features = $loadedentry->attrObjList("Features");
    my $featuresnumber= $features->size();
    $entryhash{FeaturesNumber}=$featuresnumber;
    my ($feature,$feature_name,$feature_description,$feature_location);
    my @features;
    for $i (0..$featuresnumber-1) {
      my %feature;
      $feature = $features->get($i);
      $feature_name = $feature->attrStr("FtKey");
      $feature_location = $feature->attrStr("FtLocation");
      $feature_location =~ s/ +/ /g;
      $feature_description = $feature->attrStr("FtDescription");
      chop $feature_description;
      $feature_description =~ s/\nFT                                / /g;

      # Put things inside hash
      $feature{position}=$i;
      $feature{name}=$feature_name;
      $feature{location}=$feature_location;
      $feature{description}=$feature_description;

      push (@features,\%feature); # array of features
    }
    $entryhash{Features}=\@features; # put this also into the hash
    return (\%entryhash);
  }

}

cdslocation2transcript	No description	Code
embl2hash	Description	Code
get_swisshash	Description	Code
joinedlocation2range	No description	Code
load	Description	Code
location2range	No description	Code
novelaasequence2gene	Description	Code
swissprot2hash	Description	Code