Bio::EnsEMBL::Compara::Production DnaFragChunk
Package variablesSynopsisDescriptionGeneral documentationMethods
Toolbar
WebCvsRaw content
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Compara::DBSQL::SequenceAdaptor
Bio::EnsEMBL::Compara::DnaFrag
Bio::EnsEMBL::Utils::Exception
Time::HiRes qw ( time gettimeofday tv_interval )
Synopsis
Description
Methods
adaptor
No description
Code
bioseqDescriptionCode
cache_sequence
No description
Code
dbID
No description
Code
display_idDescriptionCode
dnafrag
No description
Code
dnafrag_id
No description
Code
dump_chunks_to_fasta_file
No description
Code
dump_to_fasta_file
No description
Code
fetch_masked_sequenceDescriptionCode
length
No description
Code
masking_analysis_data_id
No description
Code
masking_options
No description
Code
new
No description
Code
seq_end
No description
Code
seq_start
No description
Code
sequence
No description
Code
sequence_id
No description
Code
sliceDescriptionCode
Methods description
bioseqcode    nextTop
  Description: returns sequence of this chunk as a Bio::Seq object
will fetch from core if not previously cached in compara
Args : none
Example : my $bioseq = $chunk->bioseq;
Returntype : Bio::Seq object
Exceptions : none
Caller : general
display_idcodeprevnextTop
  Args       : none
Example : my $id = $chunk->display_id;
Description: returns string describing this chunk which can be used
as display_id of a Bio::Seq object or in a fasta file.
Uses dnafrag information in addition to start and end.
Returntype : string
Exceptions : none
Caller : general
fetch_masked_sequencecodeprevnextTop
  Description: Meta method which uses the slice associated with this chunk
and from the external core database associated with the slice
it extracts the masked DNA sequence.
Returns as Bio::Seq object. does not cache sequence internally
Arg [1] : (int or string) masked status of the sequence [optional]
0 or '' = unmasked (default)
1 or 'hard' = masked
2 or 'soft' = soft-masked
Arg[2] : (ref to hash) hash of masking options [optional]
Example : $bioseq = $chunk->get_sequence(1);
Returntype : Bio::Seq or undef if a problem
Exceptions : none
Caller : general
slicecodeprevnextTop
  Arg        : none
Example : $slice = $chunk->slice;
Description: Meta method which uses the dnafrag of this chunk to get the genomeDB
to connect to corresponding core database, and then to uses the core
SliceAdaptor to get a slice associated with the dnafrag type and name and
the this chunks start,end.
Returntype : Bio::EnsEMBL::Slice object
Exceptions : none
Caller : general, self->fetch_masked_sequence()
Methods code
adaptordescriptionprevnextTop
sub adaptor {
  my $self = shift;
  $self->{'_adaptor'} = shift if(@_);
  return $self->{'_adaptor'};
}
bioseqdescriptionprevnextTop
sub bioseq {
  my $self = shift;

  my $seq = undef;

  if(not defined($self->sequence())) {
    my $starttime = time();

    $seq = $self->fetch_masked_sequence;
    my $fetch_time = time()-$starttime;

    $self->sequence($seq);
  }
  
  $seq = Bio::Seq->new(-seq        => $self->sequence(),
                       -display_id => $self->display_id(),
                       -primary_id => $self->sequence_id(),
                       );
  return $seq;
}



##########################
#
# getter/setter methods of data which is stored in database
#
##########################
}
cache_sequencedescriptionprevnextTop
sub cache_sequence {
  my $self = shift;
  
  # $self->sequence will load from compara if available, if not go fetch it
unless($self->sequence) { $self->fetch_masked_sequence; } # fetching sequence will set $self->sequence but not sequence_id so store it
# fetching may have occurred in a previous method call, so keep this logic
# separate to make sure it will get stored
if($self->sequence_id==0) { $self->adaptor->update_sequence($self); } return $self; } 1;
}
dbIDdescriptionprevnextTop
sub dbID {
  my $self = shift;
  $self->{'_dbID'} = shift if(@_);
  return $self->{'_dbID'};
}
display_iddescriptionprevnextTop
sub display_id {
  my $self = shift;

  my $id = "";

  if($self->dbID) {
    $id .= 'chunkID'.$self->dbID.":";
  } elsif($self->dnafrag) {
    $id .= $self->dnafrag->display_id.":";
  }
  $id .= $self->seq_start.".".$self->seq_end;
  
  return $id;
}
dnafragdescriptionprevnextTop
sub dnafrag {
  my ($self,$dnafrag) = @_;

  if (defined($dnafrag)) {
    throw("arg must be a [Bio::EnsEMBL::Compara::DnaFrag] not a [$dnafrag]")
        unless($dnafrag->isa('Bio::EnsEMBL::Compara::DnaFrag'));
    $self->{'_dnafrag'} = $dnafrag;
    $self->dnafrag_id($dnafrag->dbID);
  }

  #lazy load the DnaFrag
if(!defined($self->{'_dnafrag'}) and defined($self->dnafrag_id) and $self->adaptor) { $self->{'_dnafrag'} = $self->adaptor->_fetch_DnaFrag_by_dbID($self->dnafrag_id); } return $self->{'_dnafrag'};
}
dnafrag_iddescriptionprevnextTop
sub dnafrag_id {
  my $self = shift;
  return $self->{'dnafrag_id'} = shift if(@_);
  return $self->{'dnafrag_id'};
}
dump_chunks_to_fasta_filedescriptionprevnextTop
sub dump_chunks_to_fasta_file {
  my $self = shift;
  my $fastafile = shift;

  #choosen because it is divisible by 60 (fasta line width used)
my $chunk_size = 99999960; my $start = 1; my $end = $chunk_size; my $total_length = $self->length; #check to see if the fastafile already exists and delete it if it does.
if (-e $fastafile) { unlink $fastafile; } open(OUTSEQ, ">>$fastafile") or $self->throw("Error opening $fastafile for write"); print OUTSEQ ">" . $self->display_id . "\n"; while ($start <= $total_length) { $self->{'_slice'} = undef; $self->{'_sequence'} = undef; $self->seq_start($start); $self->seq_end($end); my $bioseq = $self->bioseq; #increment start and end to do next chunk
$start = $end + 1; if ($end+$chunk_size < $total_length) { $end += $chunk_size; } else { $end = $total_length; } #write out sequence to fasta file
my $seq = $bioseq->seq; $seq =~ s/(.{60})/$1\n/g; $seq =~ s/\n$//; print OUTSEQ $seq, "\n"; } close OUTSEQ; return $self;
}
dump_to_fasta_filedescriptionprevnextTop
sub dump_to_fasta_file {
  my $self = shift;
  my $fastafile = shift;
  
  my $bioseq = $self->bioseq;

  #printf("  writing chunk %s\n", $self->display_id);
open(OUTSEQ, ">$fastafile") or $self->throw("Error opening $fastafile for write"); my $output_seq = Bio::SeqIO->new( -fh =>\*OUTSEQ, -format => 'Fasta'); $output_seq->write_seq($bioseq); close OUTSEQ; return $self; } #Version of dump_to_fasta_file that appends chunk_size portions of the
#sequence to fastafile. Useful if the sequence is very long eg opossum chr1 & 2
#Must remember that fastafile should not exist beforehand otherwise it will be
#appended to so check if it exists and delete it
}
fetch_masked_sequencedescriptionprevnextTop
sub fetch_masked_sequence {
  my $self = shift;
  
  return undef unless(my $slice = $self->slice());


  my $dcs = $slice->adaptor->db->dbc->disconnect_when_inactive();
  #print("fetch_masked_sequence disconnect=$dcs\n");
$slice->adaptor->db->dbc->disconnect_when_inactive(0); #printf("fetch_masked_sequence disconnect=%d\n", $slice->adaptor->db->dbc->disconnect_when_inactive());
my $seq; my $id = $self->display_id; my $masking_options; my $starttime = time(); if(defined($self->masking_options)) { $masking_options = eval($self->masking_options); my $logic_names = $masking_options->{'logic_names'}; if(defined($masking_options->{'default_soft_masking'}) and $masking_options->{'default_soft_masking'} == 0) { #print "getting HARD masked sequence...\n";
$seq = $slice->get_repeatmasked_seq($logic_names,0,$masking_options); } else { #print "getting SOFT masked sequence...\n";
$seq = $slice->get_repeatmasked_seq($logic_names,1,$masking_options); } } else { # no masking options set, so get unmasked sequence
#print "getting UNMASKED sequence...\n";
$seq = Bio::PrimarySeq->new( -id => $id, -seq => $slice->seq); } unless($seq->isa('Bio::PrimarySeq')) { #print("seq is a [$seq] not a [Bio::PrimarySeq]\n");
my $oldseq = $seq; $seq = Bio::PrimarySeq->new( -id => $id, -seq => $oldseq->seq); } #print ((time()-$starttime), " secs\n");
$slice->adaptor->db->dbc->disconnect_when_inactive($dcs); #printf("fetch_masked_sequence disconnect=%d\n", $slice->adaptor->db->dbc->disconnect_when_inactive());
#print STDERR "sequence length : ",$seq->length,"\n";
$seq = $seq->seq; if (defined $masking_options) { foreach my $ae (@{$slice->get_all_AssemblyExceptionFeatures}) { next unless (defined $masking_options->{"assembly_exception_type_" . $ae->type}); my $length = $ae->end - $ae->start + 1; if ($masking_options->{"assembly_exception_type_" . $ae->type} == 0) { my $padstr = 'N' x $length; substr ($seq, $ae->start, $length) = $padstr; } elsif ($masking_options->{"assembly_exception_type_" . $ae->type} == 1) { my $padstr = lc substr ($seq, $ae->start, $length); substr ($seq, $ae->start, $length) = $padstr; } } } $self->sequence($seq); return $seq;
}
lengthdescriptionprevnextTop
sub length {
  my $self = shift;
  return $self->{'seq_end'} - $self->{'seq_start'} + 1;
}
masking_analysis_data_iddescriptionprevnextTop
sub masking_analysis_data_id {
  my $self = shift;
  $self->{'masking_analysis_data_id'} = shift if(@_);
  $self->{'masking_analysis_data_id'}=0 unless(defined($self->{'masking_analysis_data_id'}));
  return $self->{'masking_analysis_data_id'};
}
masking_optionsdescriptionprevnextTop
sub masking_options {
  my $self = shift;
  if(@_) {
    $self->{'_masking_options'} = shift;
    $self->masking_analysis_data_id(0);
  }
  return $self->{'_masking_options'};
}

#method for passing previously known and stored analysis_data_id reference around
#so that there is no need to store it again
}
newdescriptionprevnextTop
sub new {
  my ($class, $dnafrag, $start, $end) = @_;
  my $self = {};
  bless $self,$class;
  $self->dnafrag($dnafrag) if($dnafrag);
  $self->seq_start($start) if($start);
  $self->seq_end($end)     if($end);
  return $self;
}
seq_enddescriptionprevnextTop
sub seq_end {
  my $self = shift;
  my $end  = shift;
  if($end) {
    $end=$self->dnafrag->length if($self->dnafrag and ($end > $self->dnafrag->length));
    $self->{'seq_end'} = $end;
  }
  $self->{'seq_end'}=0 unless(defined($self->{'seq_end'}));
  return $self->{'seq_end'};
}
seq_startdescriptionprevnextTop
sub seq_start {
  my $self = shift;
  return $self->{'seq_start'} = shift if(@_);
  $self->{'seq_start'}=0 unless(defined($self->{'seq_start'}));
  return $self->{'seq_start'};
}
sequencedescriptionprevnextTop
sub sequence {
  my $self = shift;
  if(@_) {
    $self->{'_sequence'} = shift;
    $self->sequence_id(0);
  }

  return $self->{'_sequence'} if(defined($self->{'_sequence'}));

  #lazy load the sequence if sequence_id is set
if(defined($self->sequence_id()) and defined($self->adaptor())) { $self->{'_sequence'} = $self->adaptor->db->get_SequenceAdaptor->fetch_by_dbID($self->sequence_id); } return $self->{'_sequence'};
}
sequence_iddescriptionprevnextTop
sub sequence_id {
  my $self = shift;
  return $self->{'sequence_id'} = shift if(@_);
  return $self->{'sequence_id'};
}
slicedescriptionprevnextTop
sub slice {
  my ($self) = @_;

  return $self->{'_slice'} if(defined($self->{'_slice'}));

  return undef unless($self->dnafrag);
  return undef unless($self->dnafrag->genome_db);
  return undef unless(my $dba = $self->dnafrag->genome_db->db_adaptor);

  my $sliceDBA = $dba->get_SliceAdaptor;
  #if ($self->seq_end > $self->seq_start) {
#Should be >= to since end can equal start and the slice be one base long
#If seq_start and seq_end are both 0, set the slice to be whole dnafrag
if ($self->seq_end >= $self->seq_start && $self->seq_end != 0) { $self->{'_slice'} = $sliceDBA->fetch_by_region($self->dnafrag->coord_system_name, $self->dnafrag->name, $self->seq_start, $self->seq_end); } else { $self->{'_slice'} = $sliceDBA->fetch_by_region($self->dnafrag->coord_system_name, $self->dnafrag->name); } return $self->{'_slice'};
}
General documentation
NAME - Bio::EnsEMBL::Compara::Production::DnaFragChunkTop
CONTACTTop
Jessica Severin <jessica@ebi.ac.uk>
APPENDIXTop
The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _