Bio::EnsEMBL::Analysis::RunnableDB Solexa2Genes
SummaryIncluded librariesPackage variablesSynopsisDescriptionGeneral documentationMethods
Toolbar
WebCvsRaw content
Summary
Bio::EnsEMBL::Analysis::RunnableDB::Solexa2Genes
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Analysis::Config::GeneBuild::Solexa2Genes
Bio::EnsEMBL::Analysis::RunnableDB
Bio::EnsEMBL::Analysis::RunnableDB::BaseGeneBuild
Bio::EnsEMBL::Analysis::Tools::GeneBuildUtils::ExonUtils
Bio::EnsEMBL::Analysis::Tools::GeneBuildUtils::TranscriptUtils
Bio::EnsEMBL::DnaDnaAlignFeature
Bio::EnsEMBL::FeaturePair
Bio::EnsEMBL::SimpleFeature
Bio::EnsEMBL::Transcript
Bio::EnsEMBL::Utils::Exception qw ( throw warning )
Inherit
Bio::EnsEMBL::Analysis::RunnableDB::BaseGeneBuild
Synopsis
  my $db      = Bio::EnsEMBL::DBAdaptor->new($locator);
my $refine_genes = Bio::EnsEMBL::Analysis::RunnableDB::Solexa2Genes->new (
-db => $db,
-input_id => $input_id
-analysis => $analysis );
$refine_genes->fetch_input();
$refine_genes->run();
$refine_genes->write_output(); #writes to DB
Description
The databases containing the various features to combine is defined in
Bio::EnsEMBL::Analysis::Config::Databases and the configuration for the
module is defined in Bio::EnsEMBL::Analysis::Config::GeneBuild::Solexa2Genes
Methods
ALIGNMENT_DB
No description
Code
END_EXON_COVERAGE
No description
Code
EXCLUDE_EXONS
No description
Code
MERGE_GENES
No description
Code
MIN_EXONS
No description
Code
MIN_LENGTH
No description
Code
MIN_SPAN
No description
Code
OUTPUT_DB
No description
Code
chr_slice
No description
Code
exon_clusterDescriptionCode
feature_slice_adaptor
No description
Code
fetch_inputDescriptionCode
gene_cluster
No description
Code
make_geneDescriptionCode
merge_genesDescriptionCode
new
No description
Code
pad_exonsDescriptionCode
reads
No description
Code
repeat_feature_adaptor
No description
Code
repeat_slice_adaptor
No description
Code
run
No description
Code
write_output
No description
Code
Methods description
exon_clustercode    nextTop
    Title        :   exon_cluster
Usage : $self->exon_cluster($ugfs)
Returns : Array ref Bio::EnsEMBL::Exon
Args : Array ref Bio::EnsEMBL::Exon
Description : clusters individual reads into blocks representing exons
: uses pair information to link blocks into transcripts
: filters out poorly supported blocks
fetch_inputcodeprevnextTop
    Title        :   fetch_input
Usage : $self->fetch_input
Returns : nothing
Args : none
make_genecodeprevnextTop
    Title        :   make_gene
Usage : $self->make_gene($exons)
Returns : Array ref of Bio::EnsEMBL::Gene objects
Args : Array ref of Bio::EnsEMBL::Exon objects
Description : Builds gene models from an array of exons
merge_genescodeprevnextTop
    Title        :   merge_genes
Usage : $self->make_genes($genes)
Returns : Array ref of Bio::EnsEMBL::Gene objects
Args : Array ref of Bio::EnsEMBL::Gene objects
Description : Merges gene models when they are separated
: by a gap of <= MERGE_GENES and the gap is 100%
: covered by repeats
pad_exonscodeprevnextTop
    Title        :   pad_exons
Usage : $self->($exon_clusters)
Returns : Array ref of Bio::EnsEMBL::Exon
Args : Array ref of Bio::EnsEMBL::Exon
Description : Takes an array of Exons, pads them and builds a
: DnaAlignFeature from it that represents a transcript
Methods code
ALIGNMENT_DBdescriptionprevnextTop
sub ALIGNMENT_DB {
  my ($self,$value) = @_;

  if (defined $value) {
    $self->{'_CONFIG_ALIGNMENT_DB'} = $value;
  }
  
  if (exists($self->{'_CONFIG_ALIGNMENT_DB'})) {
    return $self->{'_CONFIG_ALIGNMENT_DB'};
  } else {
    return undef;
  }
}
END_EXON_COVERAGEdescriptionprevnextTop
sub END_EXON_COVERAGE {
  my ($self,$value) = @_;

  if (defined $value) {
    $self->{'_CONFIG_END_EXON_COVERAGE'} = $value;
  }
  
  if (exists($self->{'_CONFIG_END_EXON_COVERAGE'})) {
    return $self->{'_CONFIG_END_EXON_COVERAGE'};
  } else {
    return 0;
  }
}
EXCLUDE_EXONSdescriptionprevnextTop
sub EXCLUDE_EXONS {
  my ($self,$value) = @_;

  if (defined $value) {
    $self->{'_CONFIG_EXCLUDE_EXONS'} = $value;
  }
  
  if (exists($self->{'_CONFIG_EXCLUDE_EXONS'})) {
    return $self->{'_CONFIG_EXCLUDE_EXONS'};
  } else {
    return 0;
  }
}
MERGE_GENESdescriptionprevnextTop
sub MERGE_GENES {
  my ($self,$value) = @_;

  if (defined $value) {
    $self->{'_CONFIG_MERGE_GENES'} = $value;
  }
  
  if (exists($self->{'_CONFIG_MERGE_GENES'})) {
    return $self->{'_CONFIG_MERGE_GENES'};
  } else {
    return 0;
  }
}



1;
}
MIN_EXONSdescriptionprevnextTop
sub MIN_EXONS {
  my ($self,$value) = @_;

  if (defined $value) {
    $self->{'_CONFIG_MIN_EXONS'} = $value;
  }
  
  if (exists($self->{'_CONFIG_MIN_EXONS'})) {
    return $self->{'_CONFIG_MIN_EXONS'};
  } else {
    return 0;
  }
}
MIN_LENGTHdescriptionprevnextTop
sub MIN_LENGTH {
  my ($self,$value) = @_;

  if (defined $value) {
    $self->{'_CONFIG_MIN_LENGTH'} = $value;
  }
  
  if (exists($self->{'_CONFIG_MIN_LENGTH'})) {
    return $self->{'_CONFIG_MIN_LENGTH'};
  } else {
    return 0;
  }
}
MIN_SPANdescriptionprevnextTop
sub MIN_SPAN {
  my ($self,$value) = @_;

  if (defined $value) {
    $self->{'_CONFIG_MIN_SPAN'} = $value;
  }
  
  if (exists($self->{'_CONFIG_MIN_SPAN'})) {
    return $self->{'_CONFIG_MIN_SPAN'};
  } else {
    return 0;
  }
}
OUTPUT_DBdescriptionprevnextTop
sub OUTPUT_DB {
  my ($self,$value) = @_;

  if (defined $value) {
    $self->{'_CONFIG_OUTPUT_DB'} = $value;
  }
  
  if (exists($self->{'_CONFIG_OUTPUT_DB'})) {
    return $self->{'_CONFIG_OUTPUT_DB'};
  } else {
    return undef;
  }
}
chr_slicedescriptionprevnextTop
sub chr_slice {
  my ($self, $val) = @_;

  if (defined $val) {
    $self->{_chr_slice} = $val;
  }

  return $self->{_chr_slice};
}
##########################################
# Config variables
}
exon_clusterdescriptionprevnextTop
sub exon_cluster {
  my ($self,$gene_cluster) = @_;
  my @gapped_reads =  @{$gene_cluster->{'features'}};
  my @ugfs;
  # get the ungapped features ( corresponds to individual reads
foreach my $read ( @gapped_reads ) { foreach my $ugf ($read->ungapped_features) { push @ugfs, $ugf; } } my $reads = $self->reads; my @exon_clusters; my $exon_cluster_num = 0; my $clean_exon_clusters; my @final_exon_clusters; my @transcripts; my @ungapped_features = sort { $a->start <=> $b->start } @ugfs; my $cluster_data; my $cluster_hash; my $pairs; # make exon clusters and store the names of the reads and associated cluster number
foreach my $ugf ( @ungapped_features ) { my $clustered = 0; foreach (my $i = 0 ; $i < scalar(@exon_clusters) ; $i++ ) { my $exon_cluster = $exon_clusters[$i]; # do they overlap?
if ( $ugf->start <= $exon_cluster->end+1 && $ugf->end >= $exon_cluster->start-1 ) { # Expand the exon_cluster
$exon_cluster->start($ugf->start) if $ugf->start < $exon_cluster->start; $exon_cluster->end($ugf->end) if $ugf->end > $exon_cluster->end; $exon_cluster->score($exon_cluster->score + 1); $clustered = 1; $cluster_data->{$ugf->hseqname}->{$i} = 1; } } # start a new cluster if there is no overlap
unless ( $clustered ) { $cluster_data->{$ugf->hseqname}->{$exon_cluster_num} = 1 ; $ugf->score(1); push @exon_clusters, $ugf; $exon_cluster_num++; } } if ( scalar(@exon_clusters) == 1 ) { # just keep single exon clusters for now - might be useful later
my $exon = $exon_clusters[0]; push @transcripts, [$exon]; return\@ transcripts; } print STDERR scalar(@exon_clusters) . " Exons\n"; # make the exon pairings store them in cluster hash
foreach my $read ( keys %{$cluster_data} ) { my @clusters = keys %{$cluster_data->{$read}}; for (my $i = 0; $i < @clusters ; $i ++ ) { my $cluster = $clusters[$i]; # adjust for compressed reads
my $read_depth = 1; $read_depth = $reads->{$read}->p_value if $reads->{$read}->p_value; $cluster_hash->{$clusters[0]}->{$cluster} += $read_depth unless $clusters[0] == $cluster; } } # join the exon_clusters so that terminal exons get penalised
# but put in all the other exons
my $clean_cluster_hash; my $average_read_depth = 0; foreach my $key ( sort keys %{$cluster_hash} ) { # which exon_clusters is it connected to ?
# is it an end exon? ie only connects to exon_clusters on one side?
foreach my $key2 ( keys %{$cluster_hash->{$key}} ){ # how many reads join these clusters to the right and the left?
# initialise read count first - stop error messages
my $read_count = $cluster_hash->{$key}->{$key2} ; $clean_cluster_hash->{$key}->{'right'}+= $read_count ; $clean_cluster_hash->{$key2}->{'left'}+= $read_count ; $average_read_depth += $read_count; print "READ DEPTH $key $key2 $read_count\n"; # dont count single coverage pairs when
# working out averages
$pairs++ if $read_count >1; } } # whats the average coverage of the exons?
$average_read_depth /= $pairs if $pairs;
print STDERR "Average READ DEPTH $average_read_depth\n "; # Filter out pairings with very litte support
foreach my $key ( sort keys %{$cluster_hash} ) { foreach my $key2 ( keys %{$cluster_hash->{$key}} ){ if ($cluster_hash->{$key}->{$key2} < ($average_read_depth/100)*$self->EXCLUDE_EXONS ) {
print "Removing pair
$key $key2 with a read_depth of ".$cluster_hash->{$key}->{$key2} ."\n";
$cluster_hash->{$key}->{$key2} = 0; } } } # now lets cycle through our exon_clusters removing end exons
# with little support all other exons go forward to the next stage
# add in single exon genes might be useful later
foreach my $key ( sort keys %{$clean_cluster_hash} ) { # keep them if they are supported on both sides
if ( $clean_cluster_hash->{$key}->{'left'} && $clean_cluster_hash->{$key}->{'right'} ) { $clean_exon_clusters->{$key} = 1; next; } # is it a well supported left end exon?
if ( $clean_cluster_hash->{$key}->{'left'} && $clean_cluster_hash->{$key}->{'left'} >= $self->END_EXON_COVERAGE) { $clean_exon_clusters->{$key} = 1 ; next; } # is it a well supported right end exon?
if ( $clean_cluster_hash->{$key}->{'right'} && $clean_cluster_hash->{$key}->{'right'} >= $self->END_EXON_COVERAGE ) { $clean_exon_clusters->{$key} = 1 ; next; } # othwise ignore it
} # now need to find little clusters sitting in introns that are not connected to the transcript
# do a reclustering based on which exons are connected to each other
my @clean_clusters = keys %$clean_exon_clusters; return unless ( scalar(@clean_clusters) > 0) ; # put one exon into the first cluster
my @temp; push @temp, pop(@clean_clusters); push @final_exon_clusters,\@ temp; my $trans_count = 0; LOOP: while ( scalar(@clean_clusters) > 0 ) { my $clustered; my $final_exon_cluster = $final_exon_clusters[$trans_count]; foreach my $cluster_num ( @{$final_exon_cluster} ) { $clustered = 0; # do ANY of our exons join to this exon?
for ( my $i =0 ; $i <= $#clean_clusters; $i++ ) { my $index = $clean_clusters[$i]; # is the current exon attached to any exon in our cluster?
if ( $cluster_hash->{$index}->{$cluster_num} or $cluster_hash->{$cluster_num}->{$index}) { push @{$final_exon_cluster}, $index; # chop it out
splice(@clean_clusters,$i,1); $i--; $clustered = 1; } } } unless ($clustered) { next unless scalar(@clean_clusters) > 0; my @temp; push @temp, pop(@clean_clusters); # start another cluster
push @final_exon_clusters,\@ temp; $trans_count++; } } # So far we have just dealt with array indecies
# now store the actual features
foreach my $exon_cluster ( @final_exon_clusters ) { my @transcript; foreach my $exon ( @$exon_cluster ) { push @transcript, $exon_clusters[$exon]; } @transcript = sort { $a->start <=> $b->start} @transcript; push @transcripts,\@ transcript; } return\@ transcripts;
}
feature_slice_adaptordescriptionprevnextTop
sub feature_slice_adaptor {
  my ($self, $val) = @_;

  if (defined $val) {
    $self->{_fsa} = $val;
  }

  return $self->{_fsa};
}
fetch_inputdescriptionprevnextTop
sub fetch_input {
  my ($self) = @_;

  # store some adaptors
$self->repeat_feature_adaptor($self->db->get_RepeatFeatureAdaptor); $self->feature_slice_adaptor($self->get_dbadaptor($self->ALIGNMENT_DB)->get_SliceAdaptor); $self->repeat_slice_adaptor($self->db->get_SliceAdaptor); my $id = $self->input_id; my $slice = $self->fetch_sequence($id); my $feature_slice = $self->feature_slice_adaptor->fetch_by_region ( 'toplevel', $slice->seq_region_name, $slice->start, $slice->end, ); my $chr_slice = $self->feature_slice_adaptor->fetch_by_region('toplevel', $slice->seq_region_name, ); $self->chr_slice($chr_slice); my @features = @{$feature_slice->get_all_DnaAlignFeatures}; my %reads; while ( scalar(@features) > 0 ) { my $read = pop(@features); $reads{$read->hseqname} = $read->transfer($chr_slice); } # store the reads internally
$self->reads(\%reads);
}
gene_clusterdescriptionprevnextTop
sub gene_cluster {
  my ($self) = @_;
  my $reads = $self->reads;
  my @clusters;
  my @features = sort { $a->start <=> $b->start }  values %$reads ;
  foreach my $f ( @features ) {
    my $clustered = 0;
    foreach my $cluster ( @clusters ) {
      # do they overlap?
if ( $f->start <= $cluster->{'end'} && $f->end >= $cluster->{'start'} ) { # Expand the cluster
$cluster->{'start'} = $f->start if $f->start < $cluster->{'start'}; $cluster->{'end'} = $f->end if $f->end > $cluster->{'end'}; push @{$cluster->{'features'}}, $f; $clustered = 1; } } unless ($clustered) { my $cluster; push @{$cluster->{'features'}}, $f; $cluster->{'start'} = $f->start; $cluster->{'end'} = $f->end; push @clusters, $cluster; } } return\@ clusters;
}
make_genedescriptionprevnextTop
sub make_gene {
  my ($self,$exon_ref) = @_;
  # we are making reverse strand genes so the exons need to be in the reverse order
my @exons = sort { $b->start <=> $a->start } @$exon_ref; my $tran = new Bio::EnsEMBL::Transcript(-EXONS =>\@ exons); $tran->analysis($self->analysis); return @{convert_to_genes(($tran),$self->analysis)};
}
merge_genesdescriptionprevnextTop
sub merge_genes {
   my ($self, $genes_ref) = @_;
  my @merged_genes;
  # join together neighbouring genes separated by a gap of X
# where the gap is 100% filled by a repeat
# get rid of nested genes first
my @genes = sort {$a->start <=> $b->start} @$genes_ref; print " Got ". scalar(@genes) ."\n"; for ( my $i = 1 ; $i <= $#genes ; $i++ ) { if ( $genes[$i-1]->start < $genes[$i]->start && $genes[$i-1]->end > $genes[$i]->end ) { push @merged_genes, splice (@genes,$i,1); $i--; } } # is the gap between the genes short enough?
GENE: for ( my $i = 1 ; $i <= $#genes ; $i++ ) { my $left_gene = $genes[$i-1]; my $right_gene = $genes[$i]; my $gap = $right_gene->start - $left_gene->end; if ($gap <= $self->MERGE_GENES && $left_gene->end < $right_gene->start) { # is it covered by repeats?
my $repeat_slice = $self->repeat_slice_adaptor->fetch_by_region ('toplevel', $left_gene->slice->seq_region_name, $left_gene->end, $right_gene->start, ); #print "looking at " . $repeat_slice->name ."\n";
# is the gap covered by a repeat?
my @repeats = sort { $a->start <=> $b->start } @{$self->repeat_feature_adaptor->fetch_all_by_Slice($repeat_slice)} ; # merge repeat blocks
#print scalar(@repeats) . " repeats found \n";
for ( my $j = 1 ; $j <= $#repeats ; $j ++ ) { if ( $repeats[$j]->start <= $repeats[$j-1]->end+1 ){ $repeats[$j-1]->end($repeats[$j]->end) if $repeats[$j]->end > $repeats[$j-1]->end ; splice(@repeats,$j,1); $j--; } } my $repeat_coverage = 0; # so the repeats are now non-overlapping blocks ( if there is more than one )
foreach my $repeat ( @repeats ) { $repeat->start(0) if $repeat->start < 0; $repeat->end($repeat_slice->length) if $repeat->end > $repeat_slice->length; $repeat_coverage+= $repeat->end - $repeat->start; } $repeat_coverage /= $repeat_slice->length;
# merge the genes together where repeat coverage is 100%
if ($repeat_coverage >= 0.95 ) { print "Do some merging\n"; print "Repeat Coverage = $repeat_coverage\n "; print $repeat_slice->name ."\n"; # to do the merge do we just link the genes or do we join them with a long exon?
# how about we keep it as 2 exons but bring them together so they abut each other
# if there is evidene of splicing they will get separated in the refine genes code
# otherwise they will stay merged, given that we are looking at areas covered by repeat
# it seems likely that the exons should read right through.
# so we want to extend the last exon of the left gene to finish at the end
# of the first exon of the right gene, we also want to lose the first right
# gene exon
my @merged_exons; my @left_exons = sort { $a->start <=> $b->start } @{$left_gene->get_all_Exons}; my @right_exons = sort { $a->start <=> $b->start } @{$right_gene->get_all_Exons}; my $merged_exon = pop(@left_exons); my $disgarded_exon = shift(@right_exons); $merged_exon->end($disgarded_exon->end); push @merged_exons,@left_exons; push @merged_exons,$merged_exon; push @merged_exons,@right_exons; my @new_genes = $self->make_gene(\@merged_exons); my $new_gene = $new_genes[0]; print "NEW GENE " . $new_gene->start ." ". $new_gene->end ."\n"; # take them out of the array and carry on
splice (@genes,$i-1,2,$new_gene); $i-= 1; print "Merged 1 successfully\n"; } } } # add whatever is left to the return array
push @merged_genes, @genes; print "Returning " . scalar (@merged_genes) . "\n"; return\@ merged_genes; } ###########################################
# Containers
}
newdescriptionprevnextTop
sub new {
  my ($class,@args) = @_;
  my $self = $class->SUPER::new(@args);
  $self->read_and_check_config($SOLEXA2GENES_CONFIG_BY_LOGIC);
  return $self;
}
pad_exonsdescriptionprevnextTop
sub pad_exons {
  my ($self,$exon_cluster_ref) = @_;

  my @padded_exons;
  my @exon_clusters = sort { $a->start <=> $b->start } @$exon_cluster_ref;
  # make a padded exon array
foreach my $exon ( @exon_clusters ){ my $padded_exon = create_Exon ( $exon->start - 20, $exon->end + 20 , -1, -1, -1, $exon->analysis, undef, undef, $self->chr_slice, ); # dont let it fall of the slice because of padding
$padded_exon->start(1) if $padded_exon->start <= 0; $padded_exon->end($self->chr_slice->length - 1) if $padded_exon->end >= $self->chr_slice->length; my $feat = new Bio::EnsEMBL::DnaDnaAlignFeature (-slice => $exon->slice, -start => $padded_exon->start, -end => $padded_exon->end, -strand => -1, -hseqname => $exon->display_id, -hstart => 1, -hstrand => 1, -hend => $padded_exon->length, -analysis => $exon->analysis, -score => $exon->score, -cigar_string => $padded_exon->length.'M'); my @feats; push @feats,$feat; $padded_exon->add_supporting_features(@feats); push @padded_exons, $padded_exon; } # dont let adjacent exons overlap
for ( my $i = 1 ; $i <= $#padded_exons; $i++ ) { my $exon = $padded_exons[$i]; my $last_exon = $padded_exons[$i-1]; if ( $last_exon->end >= $exon->start ) { # trim the exons so they dont overlap
my $trim = int(($exon->start - $last_exon->end) /2);
$last_exon->end( $last_exon->end + ($trim) -1 ); $exon->start( $exon->start - ($trim)+1 ); } } return\@ padded_exons
}
readsdescriptionprevnextTop
sub reads {
  my ($self, $value) = @_;
  if ($value ) {
     $self->{'_reads'} = $value;
  }
  return $self->{'_reads'};
}
repeat_feature_adaptordescriptionprevnextTop
sub repeat_feature_adaptor {
  my ($self, $val) = @_;

  if (defined $val) {
    $self->{_rfa} = $val;
  }

  return $self->{_rfa};
}
repeat_slice_adaptordescriptionprevnextTop
sub repeat_slice_adaptor {
  my ($self, $val) = @_;

  if (defined $val) {
    $self->{_rsa} = $val;
  }

  return $self->{_rsa};
}
rundescriptionprevnextTop
sub run {
   my ($self) =@_;
  my %reads = %{$self->reads};
  my @genes;
  print STDERR "Found " . scalar(keys %reads) . " reads in total\nRunning initial clustering...";
  # Do an intial clustering to group together the read pairs
# should roughly correspond to genes
my $gene_clusters = $self->gene_cluster; print STDERR "Found " . scalar(@$gene_clusters) . " clusters\n"; # take one cluster of reads at a time
foreach my $gene_cluster ( @$gene_clusters ) { # break the gene clusters down into exon clusters
# linked by the pair information
my ($exon_clusters) = $self->exon_cluster($gene_cluster); next unless ( $exon_clusters ); # Now we have collapsed our reads we need to make sure we keep the connections between
# them so we can make our fake transcripts
print STDERR "Found " . scalar(@$exon_clusters) . " transcripts\n "; foreach my $exon_cluster ( @$exon_clusters ) { #print STDERR scalar(@$exon_cluster) ." exon clusters\n";
next unless scalar(@$exon_cluster) > 0; # make the dna align feature
my $padded_exons = $self->pad_exons($exon_cluster); push @genes , $self->make_gene($padded_exons) if $padded_exons ; } } # merge genes separated by long repeats
if ( $self->MERGE_GENES ) { my $merged_genes = $self->merge_genes(\@genes); $self->output($merged_genes); } else { $self->output(\@genes); }
}
write_outputdescriptionprevnextTop
sub write_output {
  my ($self) = @_;
  my @genes = @{$self->output};

  my $outdb = $self->get_dbadaptor($self->OUTPUT_DB);
  my $gene_adaptor = $outdb->get_GeneAdaptor;
 
 my $fails = 0;
  my $total = 0;
  
  foreach my $gene ( @genes ) {
    $gene->analysis($self->analysis);
    $gene->source($self->analysis->logic_name);
    $gene->biotype('rough');
    my $tran = $gene->get_all_Transcripts->[0];
    print "FILTERING " . $tran->start ." " , $tran->end ." ";
    # Filter models before writing them
if ( scalar(@{$tran->get_all_Exons}) < $self->MIN_EXONS ) { print "Rejecting because of exon count " . scalar(@{$tran->get_all_Exons}) ."\n"; next; } if( ( $tran->end - $tran->start ) / $tran->length <= $self->MIN_SPAN ) {
print "Rejecting because of span " . (
$tran->end - $tran->start ) / $tran->length ."\n"; next; } if ( $tran->length > $self->MIN_LENGTH ){ print "Rejecting because of length " . $tran->length ."\n"; } eval { $gene_adaptor->store($gene); }; if ($@){ warning("Unable to store gene!!\n$@"); $fails++; } $total++; } if ($fails > 0) { throw("Not all genes could be written successfully " . "($fails fails out of $total)"); } print STDERR "$total genes written after filtering\n";
}
General documentation
simple_clusterTop
    Title        :   simple_cluster
Usage : $self->($reads)
Returns : Hash ref of clustered reads
Args : None
Description : Does a simple clustering of read pairs to identify
: groups of pairs that correspond to genes