Bio::EnsEMBL::Analysis::RunnableDB Pmatch
SummaryIncluded librariesPackage variablesDescriptionGeneral documentationMethods
Toolbar
WebCvsRaw content
Summary
  
Bio::EnsEMBL::Analysis::RunnableDB::Pmatch - aligns protein sequence to genomic sequence
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Analysis::Config::GeneBuild::Pmatch qw ( PMATCH_BY_LOGIC )
Bio::EnsEMBL::Analysis::Runnable::Pmatch
Bio::EnsEMBL::Analysis::RunnableDB::BaseGeneBuild
Bio::EnsEMBL::Utils::Argument qw ( rearrange )
Bio::EnsEMBL::Utils::Exception qw ( throw warning )
Inherit
Unavailable
Synopsis
No synopsis!
Description
Pmatch is a fast alignment program written by Richard Durbin we used
to align species specific proteins to the genome, (We also use it to
align very closely related species proteins sets e.g Mouse to Rat or
Fugu to Tetraodon).
The pmatch source code is available from the
sanger cvs respository in module rd-utils,
(http://cvs.sanger.ac.uk/cgi-bin/viewvc.cgi/rd-utils/?root=ensembl). The code to run this process in the ensembl code
base can be found in 2 RunnableDBs and a config
file. Bio::EnsEMBL::Analysis::RunnableDB::Pmatch,
Bio::EnsEMBL::Analysis::RunnableDB::BestPmatch and
Bio::EnsEMBL::Analysis::Config::GeneBuild::Pmatch
Methods
BINARY_LOCATION
No description
Code
MAX_INTRON_LENGTH
No description
Code
MIN_COVERAGE
No description
Code
OUTPUT_DB
No description
Code
PROTEIN_FILE
No description
Code
REPEAT_MASKING
No description
Code
fetch_input
No description
Code
get_adaptor
No description
Code
new
No description
Code
read_and_check_config
No description
Code
write_output
No description
Code
Methods description
None available.
Methods code
BINARY_LOCATIONdescriptionprevnextTop
sub BINARY_LOCATION {
  my ($self, $arg) = @_;
  if($arg){
    $self->{'BINARY_LOCATION'} = $arg;
  }
  return $self->{'BINARY_LOCATION'};
}
MAX_INTRON_LENGTHdescriptionprevnextTop
sub MAX_INTRON_LENGTH {
  my ($self, $arg) = @_;
  if($arg){
    $self->{'MAX_INTRON_SIZE'} = $arg;
  }
  return $self->{'MAX_INTRON_SIZE'};
}
MIN_COVERAGEdescriptionprevnextTop
sub MIN_COVERAGE {
  my ($self, $arg) = @_;
  if($arg){
    $self->{'MIN_COVERAGE'} = $arg;
  }
  return $self->{'MIN_COVERAGE'};
}
OUTPUT_DBdescriptionprevnextTop
sub OUTPUT_DB {
  my ($self, $arg) = @_;
  if($arg){
    $self->{'OUTPUT_DB'} = $arg;
  }
  return $self->{'OUTPUT_DB'};
}
PROTEIN_FILEdescriptionprevnextTop
sub PROTEIN_FILE {
  my ($self, $arg) = @_;
  if($arg){
    $self->{'PROTEIN_FILE'} = $arg;
  }
  return $self->{'PROTEIN_FILE'};
}
REPEAT_MASKINGdescriptionprevnextTop
sub REPEAT_MASKING {
  my ($self, $arg) = @_;
  if($arg){
    throw("Runnable::Pmatch ".$arg." must be an array ref of logic names not .".$arg)
      unless(ref($arg) eq 'ARRAY');
    $self->{'REPEAT_MASKING'} = $arg;
  }
  return $self->{'REPEAT_MASKING'};
}
fetch_inputdescriptionprevnextTop
sub fetch_input {
  my ($self) = @_;
  my $slice = $self->fetch_sequence($self->input_id, $self->db, 
                                    $self->REPEAT_MASKING);
  $self->query($slice);
  my %parameters = %{$self->parameters_hash};
  my $program = $self->analysis->program_file;
  $program = $self->BINARY_LOCATION if(!$program);
  my $runnable = Bio::EnsEMBL::Analysis::Runnable::Pmatch
    ->new(
          -query => $self->query,
          -program => $program,
          -analysis => $self->analysis,
          -protein_file => $self->PROTEIN_FILE,
          -max_intron_length => $self->MAX_INTRON_LENGTH,
          -min_coverage => $self->MIN_COVERAGE,
         );
  $self->runnable($runnable);
}
get_adaptordescriptionprevnextTop
sub get_adaptor {
  my ($self) = @_;
  my $output_db = $self->get_dbadaptor($self->OUTPUT_DB);
  return $output_db->get_ProteinAlignFeatureAdaptor;
}
newdescriptionprevnextTop
sub new {
  my ($class,@args) = @_;
  my $self = $class->SUPER::new(@args);

  $self->read_and_check_config($PMATCH_BY_LOGIC);

  return $self;
}
read_and_check_configdescriptionprevnextTop
sub read_and_check_config {
  my $self = shift;

  $self->SUPER::read_and_check_config($PMATCH_BY_LOGIC);
  
  #######
#CHECKS
#######
foreach my $config_var (qw(PROTEIN_FILE OUTPUT_DB)){ throw("You must define $config_var in config for logic '". $self->analysis->logic_name."'") if not defined $self->$config_var; } };
}
write_outputdescriptionprevnextTop
sub write_output {
  my ($self) = @_;
  my $adaptor = $self->get_adaptor;
  my %unique;
 FEATURE:foreach my $feature(@{$self->output}){
    $feature->analysis($self->analysis);
    $feature->slice($self->query) if(!$feature->slice);
    my $unique_string = $feature->start." ".$feature->end." ".$feature->score." ".$feature->hseqname;
    next FEATURE if($unique{$unique_string});
    $unique{$unique_string} = 1;
    $self->feature_factory->validate($feature);
    eval{
      $adaptor->store($feature);
    };
    if($@){
      throw("RunnableDB:store failed, failed to write ".$feature." to ".
            "the database ".$adaptor->dbc->dbname." $@");
    }
  }
  return 1;
}


1;
}
General documentation
CONTACTTop
Email ensembl-dev@ebi.ac.uk for questions.
APPENDIXTop
The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _