ensembl-pipeline FeatureComparison
Included librariesPackage variablesGeneral documentationMethods
Toolbar
WebCvsRaw content
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Root
Bio::EnsEMBL::Utils::Argument qw ( rearrange )
Bio::EnsEMBL::Utils::Exception qw ( throw warning verbose )
Inherit
Bio::EnsEMBL::Root
Synopsis
No synopsis!
Description
No description!
Methods
compareDescriptionCode
db_type_check
No description
Code
fast_sortDescriptionCode
feature_infoDescriptionCode
newDescriptionCode
pipeline_compare
No description
Code
queryDescriptionCode
run_query
No description
Code
tables_to_compare
No description
Code
target
No description
Code
verbosityDescriptionCode
Methods description
comparecode    nextTop
  Arg [1]   : FeatureComparison
Function : Compares the to array of featues noting which features are
not the same. If the verbose flag is on it will also print out the
coordinates of the features which are the same
Returntype: none
Exceptions: none
Example :
fast_sortcodeprevnextTop
  Arg [1]   : FeatureComparison
Arg [2] : arrayref array of features
Function : sorts the features based on their start coord very quickly
Returntype: arrayref
Exceptions: none
Example :
feature_infocodeprevnextTop
  Arg [1]   : FeatureComparison
Arg [2] : arrayref of features
Function : prints the start, end and strand of each feature passed in
Returntype: none
Exceptions: none
Example :
newcodeprevnextTop
  Arg [1]   : FeatureComparison
Arg [2] : arrayref, array of features
Arg [3] : arrayref, array of features
Arg [4] : int, binary switch for verbosity
Function : create a FeatureComparison module
Returntype: FeatureComparison
Exceptions:
Example : my $feature_comp = FeatureComparison->
(
-QUERY => $query,
-TARGET => $target,
-VERBOSE => $verbose,
);
query/targetcodeprevnextTop
  Arg [1]   : FeatureComparison
Arg [2] : arrayref, array of features
Function : container for the arrayref of features
Returntype: arrayref
Exceptions: throws if not passed an arrayref
Example :
verbositycodeprevnextTop
  Arg [1]   : FeatureComparison
Arg [2] : int
Function : toggle as to be verbose or not
Returntype: int
Exceptions:
Example :
Methods code
comparedescriptionprevnextTop
sub compare {
  my ($self) = @_;
  my @query = @{$self->query};
  my @target = @{$self->target};
  my @q_not_matched;
  @query = @{$self->fast_sort(\@query)};
  @target = @{$self->fast_sort(\@target)};
  print "\nThere are ".@query." query features\n";
  print "There are ".@target." target features\n\n";
  my $num_t_feats = scalar(@target);
  my $start_ind = 0;
 QUERY:foreach my $feat(@query){
    if($start_ind > $num_t_feats){   #as the query features and target feature have been
push(@q_not_matched, $feat); #sorted, if there are more query features than target
next QUERY; #features, the query will surely fail to find a match.
} TARGET:for (my $i = $start_ind; $i< $num_t_feats; $i++){ my $t_feat = $target[$i]; next TARGET if(!$t_feat); if($t_feat->start == $feat->start && $t_feat->end == $feat->end && $t_feat->strand == $feat->strand){ if($self->verbosity){ print "Got match\n"; print "t_feat = " . $t_feat->start . " " . $t_feat->end . " ". $t_feat->strand."\n"; print "feat = " . $feat->start . " " . $feat->end . " ". $feat->strand."\n"; } $target[$i] = undef; #those targets which have been matched with query were "wiped out"
next QUERY; }elsif($t_feat->start > $feat->start){ push(@q_not_matched, $feat); next QUERY; }elsif($t_feat->end < $feat->start){ $start_ind++; } } } if (scalar(@q_not_matched)>0) { print "Unmatched query features\n"; $self->feature_info(\@q_not_matched); } else { print "No unmatched query features were found.\n"; print "If the analysis has run successfully, this indicates that ". "all query features had matching target features.\n"; } my @unmatched_targets; for (my $i = 0; $i < scalar(@target); $i++) { if (defined $target[$i]) { push (@unmatched_targets, $target[$i]); } } if (scalar(@unmatched_targets) > 0) { print "\nUnmatched target features\n"; $self->feature_info(\@target); } else { print "All target features had matching query features.\n"; }
}
db_type_checkdescriptionprevnextTop
sub db_type_check {
  my ($self, $db) = @_;
  if(!$db->isa('Bio::EnsEMBL::Pipeline::DBSQL::DBAdaptor')){
    throw("Can't run the RuleManager with $db you need a ".
          "Bio::EnsEMBL::Pipeline::DBSQL::DBAdaptor");
  }
  return;
}
fast_sortdescriptionprevnextTop
sub fast_sort {
  my ($self, $array) = @_;
  my @array = map { $_->[1] } sort { $a->[0] <=> $b->[0] } 
    map { [$_->start, $_] } @$array;
  return\@ array;
}
feature_infodescriptionprevnextTop
sub feature_info {
  my ($self, $array) = @_;
  my $name;
  foreach my $f(@$array){
    if(!$f){
      next;
    }
    $name = $f->analysis->logic_name if(!$name);
    print $name." ".$f->slice->seq_region_name." ".$f->start." ".$f->end.
      " ".$f->strand."\n";
  }
}
newdescriptionprevnextTop
sub new {
  my ($class,@args) = @_;
  my $self = $class->SUPER::new(@args);
  &verbose('WARNING');
  my ($query, $target, $verbose) = rearrange( ['QUERY', 'TARGET',
                                               'VERBOSE'], @args);
  $self->query($query);
  $self->target($target);
  $self->verbosity($verbose);
  return $self;
}
pipeline_comparedescriptionprevnextTop
sub pipeline_compare {
  my($self, $table_names, $query_db, $ref_db) = @_;
  if(!$table_names){
    $table_names = $self->tables_to_compare;
  }
  $self->db_type_check($query_db);
  $self->db_type_check($ref_db);
  if(@$table_names == 0){
    print "FeatureComparison:pipeline_compare --- you passed in no tables ".
      "so we can't make any comparisons\n";
    return;
  }
  my $query_numbers = {};
  my $ref_numbers = {};
  foreach my $table(@$table_names){
    my $sql = "select logic_name, count(*) from analysis, ";
    $sql .= $table." where analysis.analysis_id=".$table.".analysis_id ".
      "group by logic_name\n";
    $query_numbers = $self->run_query($sql, $query_db, $query_numbers);
    $ref_numbers = $self->run_query($sql, $ref_db, $ref_numbers);
  }
  print "\nThere are ".keys(%$query_numbers)." analysis in the query database ".
    $query_db->dbname." and ".keys(%$ref_numbers)." analysis in the reference ".
      "database ".$ref_db->dbname.", all from ".@$table_names." tables\n\n";
  printf("%-15s %-15s %-15s\n", 'logic_name', 'query_count', 'ref_count');
  printf("%-15s %-15s %-15s\n", '----------', '-----------', '---------');
  foreach my $logic_name(keys(%$query_numbers)){
    printf("%-15s %-15s %-15s\n", $logic_name, $query_numbers->{$logic_name},
           $ref_numbers->{$logic_name});
  }
  print "\n";
}
querydescriptionprevnextTop
sub query {
  my ($self, $query) = @_;
  if($query){
    throw("query must be an array ref not a ".$query) 
      unless(ref($query) eq 'ARRAY'); 
    $self->{'query'} = $query;
  }
  return $self->{'query'};
}
run_querydescriptionprevnextTop
sub run_query {
  my ($self, $sql, $db, $hash) = @_;
  my $sth = $db->prepare($sql);
  $sth->execute;
  while(my($logic_name, $count) = $sth->fetchrow){
    if(!$hash->{$logic_name}){
      $hash->{$logic_name} = $count;
    }else{
      warning($logic_name." already appears in ".$hash." with count ".
            $hash->{$logic_name}." from database ".$db->dbname);
    }
  }
  return $hash;
}
tables_to_comparedescriptionprevnextTop
sub tables_to_compare {
  my ($self, $target) = @_;
  if($target){
    throw("tables to compare must be an array ref not a ".$target) 
      unless(ref($target) eq 'ARRAY');
    $self->{'tables'} = $target;
  }
  return $self->{'tables'};
}
targetdescriptionprevnextTop
sub target {
  my ($self, $target) = @_;
  if($target){
    throw("target must be an array ref not a ".$target) 
      unless(ref($target) eq 'ARRAY');
    $self->{'target'} = $target;
  }
  return $self->{'target'};
}
verbositydescriptionprevnextTop
sub verbosity {
  my $self = shift;
  $self->{'verbose'} = shift if(@_);
  return $self->{'verbose'};
}
General documentation
No general documentation available.