XrefParser UniGeneParser
Included librariesPackage variablesGeneral documentationMethods
Toolbar
WebCvsRaw content
Package variables
Privates (from "my" definitions)
%geneid_2_desc;
$verbose;
Included modules
File::Basename
Inherit
XrefParser::BaseParser
Synopsis
No synopsis!
Description
No description!
Methods
create_xrefs
No description
Code
get_desc
No description
Code
run
No description
Code
Methods description
None available.
Methods code
create_xrefsdescriptionprevnextTop
sub create_xrefs {
  my $self = shift;

  my ( $peptide_source_id, $unigene_source_id, $uniq_file, $data_file,
      $species_id )
    = @_;

  # Create a hash of all valid names for this species. Not used...
# my %species2name = $self->species_id2name();
# my @names = @{$species2name{$species_id}};
# my %name2species_id = map{ $_=>$species_id } @names;
if ( !defined( $self->get_desc($data_file) ) ) { return undef; } my $unigene_io = $self->get_filehandle($uniq_file); if ( !defined $unigene_io ) { print STDERR "Can't open RefSeq file $uniq_file\n"; return undef; } #>gnl|UG|Hs#S19185843 Homo sapiens N-acetyltransferase 2 (arylamine N-acetyltransferase)
# , mRNA (cDNA clone MGC:71963 IMAGE:4722596), complete cds /cds=(105,977) /gb=BC067218 /gi=45501306 /ug=Hs.2 /len=1344
#GGGGACTTCCCTTGCAGACTTTGGAAGGGAGAGCACTTTATTACAGACCTTGGAAGCAAG
my @xrefs; local $/ = "\n>"; while ( $_ = $unigene_io->getline() ) { my $xref; my $entry = $_; chomp $entry; my ($header, $sequence) = split (/\n/, $entry, 2); $sequence =~ s/^>//; # remove newlines
my @seq_lines = split (/\n/, $sequence); $sequence = join("", @seq_lines); # (my $gnl, my $n, my $rest) = split(/\|/, $header,3);
(my $acc_no_ver) = $header =~ /\/ug=(\S*)/; if(!defined($geneid_2_desc{$acc_no_ver})){ print "****$_\n"; $geneid_2_desc{$acc_no_ver} = ""; warn "No desc for $acc_no_ver\n"; } $xref->{SEQUENCE_TYPE} = 'dna'; $xref->{STATUS} = 'experimental'; $xref->{SOURCE_ID} = $unigene_source_id; ##No species check as files contain data fro only one species.
$xref->{ACCESSION} = $acc_no_ver; $xref->{LABEL} = $acc_no_ver; $xref->{DESCRIPTION} = $geneid_2_desc{$acc_no_ver}; $xref->{SEQUENCE} = $sequence; $xref->{SPECIES_ID} = $species_id; $xref->{INFO_TYPE} = "SEQUENCE_MATCH"; push @xrefs, $xref; } $unigene_io->close(); %geneid_2_desc=(); print "Read " . scalar(@xrefs) ." xrefs from $uniq_file\n" if($verbose); return\@ xrefs; } 1;
}
get_descdescriptionprevnextTop
sub get_desc {
  my $self = shift;
  my $data_file = shift;

  my $dir = dirname($data_file);

  local $/ = "//";

  my $desc_io = $self->get_filehandle( $data_file );

  if ( !defined $desc_io ) {
    print STDERR "ERROR: Can't open $data_file\n";
    return undef;
  }

  while ( $_ = $desc_io->getline() ) {
    #ID          Hs.159356
#TITLE Hypothetical LOC388277
(my $id) = $_ =~ /ID\s+(\S+)/; (my $descrip) = $_ =~ /TITLE\s+(.+)\n/; if ( defined $id && defined $descrip ) { $geneid_2_desc{$id} = $descrip; } } $desc_io->close(); return 1;
}
rundescriptionprevnextTop
sub run {
  my $self = shift if (defined(caller(1)));

  my $source_id = shift;
  my $species_id = shift;
  my $files       = shift;
  my $release_file   = shift;
  $verbose       = shift;

  my $uniq_file = @{$files}[0];
  my $data_file = @{$files}[1];

  my $unigene_source_id = $self->get_source_id_for_source_name('UniGene');

  print "UniGene source ID = $unigene_source_id.\n" if($verbose);

  if ( !defined($species_id) ) {
    $species_id =
      $self->get_species_id_for_filename($uniq_file);
  }

  my $xrefs =
    $self->create_xrefs( $unigene_source_id, $unigene_source_id,
      $uniq_file, $data_file, $species_id );

  if(!defined($xrefs)){
    return 1; #error
} if(!defined($self->upload_xref_object_graphs($xrefs))){ return 1; # error
} if ( defined $release_file ) { # Get species name from species ID.
my $species_name; my $sth = $self->dbi() ->prepare("SELECT name FROM species WHERE species_id = ?"); $sth->execute($species_id); $sth->bind_columns(\$ species_name ); $sth->fetchrow_array(); $species_name =~ tr/_/ /; # Parse and set release info.
my $release; my $release_io = $self->get_filehandle($release_file); while ( defined( my $line = $release_io->getline() ) ) { if ( $line =~ /^(.*$species_name)/i ) { $release = $1; } } $release_io->close(); if ( defined $release ) { $release =~ s/\s{2,}/ /g; $release =~ s/^(.*) UniGene/$1, UniGene/; print "UniGene release: '$release'\n" if($verbose); $self->set_release( $unigene_source_id, $release ); } } return 0; # successfull
} my %geneid_2_desc;
}
General documentation
No general documentation available.