Bio::EnsEMBL::Compara::RunnableDB
GenomeDumpFasta
Toolbar
Summary
Bio::EnsEMBL::Compara::RunnableDB::GenomeDumpFasta
Package variables
No package variables defined.
Included modules
Inherit
Synopsis
my $db = Bio::EnsEMBL::Compara::DBAdaptor->new($locator);
my $repmask = Bio::EnsEMBL::Compara::RunnableDB::GenomeDumpFasta->new (
-db => $db,
-input_id => $input_id
-analysis => $analysis );
$repmask->fetch_input(); #reads from DB
$repmask->run();
$repmask->output();
$repmask->write_output(); #writes to DB
Description
This RunnableDB takes a genome_db as input and creates a blast database in a shared directory
and creates a corresponding blast_<genome> analysis off of the blast_template analysis.
The new genome specific analysis is given a logic name like blast_1_NCBI35.
Methods
createBlastAnalysis | No description | Code |
dumpPeptidesToFasta | No description | Code |
fetch_input | Description | Code |
getSubsetIdForGenomeDBId | No description | Code |
parameter_hash | No description | Code |
run | No description | Code |
updateBlastAnalysis | No description | Code |
write_output | No description | Code |
Methods description
Title : fetch_input Usage : $self->fetch_input Function: Fetches input data for repeatmasker from the database Returns : none Args : none |
Methods code
createBlastAnalysis | description | prev | next | Top |
sub createBlastAnalysis
{
my $self = shift;
my $blastdb = shift;
my $blast_template = $self->db->get_AnalysisAdaptor->fetch_by_logic_name('blast_template');
my %fasta_dump_parameters = $self->parameter_hash($self->analysis->parameters());
my $params = "{subset_id=>" . $self->{'pepSubset'}->dbID;
$params .= ",genome_db_id=>" . $self->{'genome_db'}->dbID if($self->{'genome_db'});
if($blast_template->parameters()) {
my $parmhash = eval($blast_template->parameters);
if (defined $parmhash->{'blast_template_analysis_data_id'}) {
my $blast_template_analysis_data_id = $parmhash->{'blast_template_analysis_data_id'};
my $ada = $self->db->get_AnalysisDataAdaptor;
my $new_params = eval($ada->fetch_by_dbID($blast_template_analysis_data_id));
if (defined $new_params) {
$parmhash = $new_params;
}
}
if($parmhash->{'null_cigar'}) {
$params .= ",null_cigar=>'" . $parmhash->{'null_cigar'} . "'";
}
if($parmhash->{'reuse_db'}) {
$params .= ",reuse_db=>'" . $parmhash->{'reuse_db'} . "'";
}
if($parmhash->{'reuse_gdb'}) {
$params .= ",reuse_gdb=>" . "[". join(",",@{$parmhash->{'reuse_gdb'}}). "]";
}
if($parmhash->{'options'}) {
$params .= ",options=>'" . $parmhash->{'options'} . "'";
}
}
$params .= '}';
print("createBlastAnalysis\n params = $params\n");
my $seq_name = $blastdb->sequence_file;
my ($dbname) = $seq_name =~ /([^\/]+)$/;
my $analysis = Bio::EnsEMBL::Analysis->new(
-db => $dbname,
-db_file => $blastdb->sequence_file,
-db_version => '1',
-logic_name => $self->{'logic_name'},
-program => $blast_template->program(),
-program_file => $blast_template->program_file(),
-program_version => $blast_template->program_version(),
-module => $blast_template->module(),
-parameters => $params,
);
my $blast_analysis_data_id =
$self->db->get_AnalysisDataAdaptor->store_if_needed($params);
if (defined $blast_analysis_data_id) {
my $parameters = "{'analysis_data_id'=>'$blast_analysis_data_id'}";
$analysis->parameters($parameters);
}
$self->db->get_AnalysisAdaptor()->store($analysis);
$self->db->get_AnalysisAdaptor()->update($analysis);
my $stats = $self->db->get_AnalysisStatsAdaptor->fetch_by_analysis_id($analysis->dbID);
$stats->batch_size(40);
my $hive_capacity = $fasta_dump_parameters{'blast_hive_capacity'};
$hive_capacity = 450 unless defined $hive_capacity; $stats->hive_capacity($hive_capacity);
$stats->update();
return $analysis; } |
sub dumpPeptidesToFasta
{
my $self = shift;
my %parameters = $self->parameter_hash($self->analysis->parameters());
printf("fasta_dir = %s\n", $parameters{'fasta_dir'});
my $fastafile = $parameters{'fasta_dir'} . "/";
if($self->{'genome_db'}) {
$fastafile .= $self->{'genome_db'}->name() . "_" .
$self->{'genome_db'}->assembly() . ".fasta";
} else {
$fastafile .= $self->{'logic_name'} . ".fasta";
}
$fastafile =~ s/\s+/_/g; $fastafile =~ s/\/\//\//g; print("fastafile = '$fastafile'\n");
$self->{'comparaDBA'}->get_SubsetAdaptor->dumpFastaForSubset($self->{'pepSubset'}, $fastafile);
my $blastdb = new Bio::EnsEMBL::Analysis::Tools::BlastDB (
-sequence_file => $fastafile,
-mol_type => "PROTEIN");
$blastdb->create_blastdb;
my $seq_name = $blastdb->sequence_file;
my ($dbname) = $seq_name =~ /([^\/]+)$/;
print("registered ". $dbname . " for ".$blastdb->sequence_file . "\n");
return $blastdb; } |
sub fetch_input
{ my $self = shift;
$self->throw("No input_id") unless defined($self->input_id);
print("input_id = ".$self->input_id."\n");
$self->throw("Improper formated input_id") unless ($self->input_id =~ /{/);
my $input_hash = eval($self->input_id);
$self->{'comparaDBA'} = Bio::EnsEMBL::Compara::DBSQL::DBAdaptor->new(-DBCONN => $self->db->dbc);
my $genome_db_id = $input_hash->{'gdb'};
my $subset_id = $input_hash->{'ss'};
$self->{'logic_name'} = undef;
if(defined($genome_db_id)) {
print("gdb = $genome_db_id\n");
$self->{'genome_db'} = $self->{'comparaDBA'}->get_GenomeDBAdaptor->fetch_by_dbID($genome_db_id);
$self->{'logic_name'} = "blast_" . $self->{'genome_db'}->dbID(). "_". $self->{'genome_db'}->assembly();
unless($subset_id) {
$subset_id = $self->getSubsetIdForGenomeDBId($genome_db_id);
}
}
throw("no subset defined, can't figure out which peptides to use\n")
unless(defined($subset_id));
$self->{'pepSubset'} = $self->{'comparaDBA'}->get_SubsetAdaptor()->fetch_by_dbID($subset_id);
unless($self->{'logic_name'}) {
$self->{'logic_name'} = "blast_" . $self->{'pepSubset'}->description;
$self->{'logic_name'} =~ s/\s+/_/g;
}
return 1; } |
sub getSubsetIdForGenomeDBId
{ my $self = shift;
my $genome_db_id = shift;
my @subsetIds = ();
my $subset_id;
my $sql = "SELECT distinct subset.subset_id " .
"FROM member, subset, subset_member " .
"WHERE subset.subset_id=subset_member.subset_id ".
"AND subset.description like '%longest%' ".
"AND member.member_id=subset_member.member_id ".
"AND member.genome_db_id=$genome_db_id;";
my $sth = $self->{'comparaDBA'}->prepare( $sql );
$sth->execute();
$sth->bind_columns( undef,\$ subset_id );
while( $sth->fetch() ) {
print("found subset_id = $subset_id for genome_db_id = $genome_db_id\n");
push @subsetIds, $subset_id;
}
$sth->finish();
if($#subsetIds > 0) {
warn ("Compara DB: more than 1 subset of longest peptides defined for genome_db_id = $genome_db_id\n");
}
if($#subsetIds < 0) {
warn ("Compara DB: no subset of longest peptides defined for genome_db_id = $genome_db_id\n");
}
return $subsetIds[0];
}
} |
sub parameter_hash
{ my $self = shift;
my $parameter_string = shift;
my %parameters;
if ($parameter_string) {
my @pairs = split (/,/, $parameter_string);
foreach my $pair (@pairs) {
my ($key, $value) = split (/=>/, $pair);
if ($key && $value) {
$key =~ s/^\s+//g;
$key =~ s/\s+$//g;
$value =~ s/^\s+//g;
$value =~ s/\s+$//g;
$parameters{$key} = $value;
} else {
$parameters{$key} = "__NONE__";
}
}
}
return %parameters;
}
1; } |
sub run
{
my $self = shift;
return 1; } |
sub updateBlastAnalysis
{
my $self = shift;
my $blastdb = shift;
my $logic_name = $self->{'logic_name'};
print("UPDATE the blastDB for analysis $logic_name\n");
my $blast_analysis = $self->db->get_AnalysisAdaptor->fetch_by_logic_name($logic_name);
$self->throw("$logic_name analysis has not been created") unless($blast_analysis);
$blast_analysis->db($blastdb->dbname);
$blast_analysis->db_file($blastdb->dbfile);
$blast_analysis->db_version(1);
$self->db->get_AnalysisAdaptor()->update($blast_analysis);
return $blast_analysis;
}
} |
sub write_output
{
my $self = shift;
my $blastdb = $self->dumpPeptidesToFasta();
my $blast_analysis = $self->createBlastAnalysis($blastdb);
return 1;
}
} |
General documentation
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _