Raw content of Bio::EnsEMBL::Analysis::RunnableDB::Pmatch =pod =head1 NAME Bio::EnsEMBL::Analysis::RunnableDB::Pmatch - aligns protein sequence to genomic sequence =head1 DESCRIPTION Pmatch is a fast alignment program written by Richard Durbin we used to align species specific proteins to the genome, (We also use it to align very closely related species proteins sets e.g Mouse to Rat or Fugu to Tetraodon). The pmatch source code is available from the sanger cvs respository in module rd-utils, (http://cvs.sanger.ac.uk/cgi-bin/viewvc.cgi/rd-utils/?root=ensembl). The code to run this process in the ensembl code base can be found in 2 RunnableDBs and a config file. Bio::EnsEMBL::Analysis::RunnableDB::Pmatch, Bio::EnsEMBL::Analysis::RunnableDB::BestPmatch and Bio::EnsEMBL::Analysis::Config::GeneBuild::Pmatch =head1 CONTACT Email ensembl-dev@ebi.ac.uk for questions. =head1 APPENDIX The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ =cut package Bio::EnsEMBL::Analysis::RunnableDB::Pmatch; use vars qw(@ISA); use strict; use Bio::EnsEMBL::Analysis::RunnableDB::BaseGeneBuild; use Bio::EnsEMBL::Analysis::Config::GeneBuild::Pmatch qw(PMATCH_BY_LOGIC); use Bio::EnsEMBL::Analysis::Runnable::Pmatch; use Bio::EnsEMBL::Utils::Exception qw(throw warning); use Bio::EnsEMBL::Utils::Argument qw (rearrange); @ISA = qw ( Bio::EnsEMBL::Analysis::RunnableDB::BaseGeneBuild ); sub new { my ($class,@args) = @_; my $self = $class->SUPER::new(@args); $self->read_and_check_config($PMATCH_BY_LOGIC); return $self; } sub fetch_input{ my ($self) = @_; my $slice = $self->fetch_sequence($self->input_id, $self->db, $self->REPEAT_MASKING); $self->query($slice); my %parameters = %{$self->parameters_hash}; my $program = $self->analysis->program_file; $program = $self->BINARY_LOCATION if(!$program); my $runnable = Bio::EnsEMBL::Analysis::Runnable::Pmatch ->new( -query => $self->query, -program => $program, -analysis => $self->analysis, -protein_file => $self->PROTEIN_FILE, -max_intron_length => $self->MAX_INTRON_LENGTH, -min_coverage => $self->MIN_COVERAGE, ); $self->runnable($runnable); } sub PROTEIN_FILE{ my ($self, $arg) = @_; if($arg){ $self->{'PROTEIN_FILE'} = $arg; } return $self->{'PROTEIN_FILE'}; } sub MIN_COVERAGE{ my ($self, $arg) = @_; if($arg){ $self->{'MIN_COVERAGE'} = $arg; } return $self->{'MIN_COVERAGE'}; } sub BINARY_LOCATION{ my ($self, $arg) = @_; if($arg){ $self->{'BINARY_LOCATION'} = $arg; } return $self->{'BINARY_LOCATION'}; } sub MAX_INTRON_LENGTH{ my ($self, $arg) = @_; if($arg){ $self->{'MAX_INTRON_SIZE'} = $arg; } return $self->{'MAX_INTRON_SIZE'}; } sub OUTPUT_DB{ my ($self, $arg) = @_; if($arg){ $self->{'OUTPUT_DB'} = $arg; } return $self->{'OUTPUT_DB'}; } sub REPEAT_MASKING{ my ($self, $arg) = @_; if($arg){ throw("Runnable::Pmatch ".$arg." must be an array ref of logic names not .".$arg) unless(ref($arg) eq 'ARRAY'); $self->{'REPEAT_MASKING'} = $arg; } return $self->{'REPEAT_MASKING'}; } sub read_and_check_config { my $self = shift; $self->SUPER::read_and_check_config($PMATCH_BY_LOGIC); ####### #CHECKS ####### foreach my $config_var (qw(PROTEIN_FILE OUTPUT_DB)){ throw("You must define $config_var in config for logic '". $self->analysis->logic_name."'") if not defined $self->$config_var; } }; sub get_adaptor{ my ($self) = @_; my $output_db = $self->get_dbadaptor($self->OUTPUT_DB); return $output_db->get_ProteinAlignFeatureAdaptor; } sub write_output{ my ($self) = @_; my $adaptor = $self->get_adaptor; my %unique; FEATURE:foreach my $feature(@{$self->output}){ $feature->analysis($self->analysis); $feature->slice($self->query) if(!$feature->slice); my $unique_string = $feature->start." ".$feature->end." ".$feature->score." ".$feature->hseqname; next FEATURE if($unique{$unique_string}); $unique{$unique_string} = 1; $self->feature_factory->validate($feature); eval{ $adaptor->store($feature); }; if($@){ throw("RunnableDB:store failed, failed to write ".$feature." to ". "the database ".$adaptor->dbc->dbname." $@"); } } return 1; } 1;