Bio::EnsEMBL::Analysis::Runnable::Finished
HalfwiseHMM
Toolbar
Summary
Bio::EnsEMBL::Analysis::Runnable::Finished::HalfwiseHMM
Package variables
No package variables defined.
Included modules
Inherit
Synopsis
my $obj = Bio::EnsEMBL::Analysis::Runnable::Finished::HalfwiseHMM->new(
-genomic => $seq,
-features => \@features,
);
$obj->run
my @newfeatures = $obj->output;
or something similar????
Description
Finds which pfam domains a particular swissprot hit matches, finds the related hmms and runs genewiseHmm
Methods
Methods description
Arg : reference to array of features Function : get/set method for input features Exception: throws an exception if not passed an array ref Caller : |
Arg : array ref to array of output features Function : adds the array of features to the output feature array Exception: throws an exception if not passed an array ref Caller : |
Arg : none Function : returns all input features Exception: none Caller : |
Arg : reference to pfam_id hash and directory if it is to be set to anything other than temp Function : creates a hmm databases of pfam ids, then creates and runs the GenewiseHmm runnable Exception: Caller : |
Arg : dbm filename Function : gets/sets dbm filename Exception: none Caller : |
Arg : the strand of the hit and the memory to be used by genewisehmm Function : runs the new method of GenewiseHmm and returns the runnable Exception: none Caller : |
Arg : id of hmm to be fetched normally a pfam id Function : runs hmmfetch on given id Exception: thows if no hmmfile is created Caller : |
Arg : reference to a hash of swissprotids and stands Function : gets all pfam ids for a particular swissprot id and puts them in a hash along with the strand the swissprot id is found on and returns its Exception: warns if swissprot id has no pfam domains Caller : |
Arg : none Function : gets swissprot ids and strands from input features and returns a hash of these Exception: none Caller : |
Arg : location of hmmdb Function : gets/sets location of hmmdb Exception: none Caller : |
Arg : location of hmmfetch program Function : gets/sets location of hmmfetch program Exception: none Caller : |
Arg : hmm filename Function : gets/sets hmmfilename Exception: none Caller : |
Arg : value memory to be set to, this is an option of how much memory genewise can use when it is run Function : gets/sets memory Exception: none Caller : |
Arg : Bio:Seq obj, reference to array of uniprot features, location of hmmfetch program, location of hmmdb, location of dbm index Function : Make a new HalfwiseHMM object defining the above variables Exception: Will throw an exception if no genomic sequence is defined or no features are passed Caller : Example : $halfwise = Bio::EnsEMBL::Analysis::Runnable::HalfwiseHMM->new(genomic => $seq features => \@features); |
Arg : none Function : returns the array of output features Exception: none Caller : |
Arg : Bio:Seq object Function : get/set method for query Exception: throws an exception if not passed a Bio:PrimarySeq Caller : |
Arg : directory if to be set to anything other than tmp Function : runs functions which run genewisehmm Exception: none Caller : |
Arg : Function : Exception : Caller : $self->create_genewisehmm_complete, $self->create_genewisehmm_individually |
Methods code
sub add_input_features
{ my ($self, $features) = @_;
if (ref($features) eq "ARRAY") {
push(@{$self->{'_input_features'}},@$features);
} else {
throw("[$features] is not an array ref.");
} } |
sub add_output_features
{ my ($self, $features) = @_;
if (ref($features) eq "ARRAY") {
push(@{$self->{'_output_features'}},@$features);
} else {
throw("[$features] is not an array ref.");
}
}
} |
sub all_input_features
{
my ($self) = @_;
return @{$self->{'_input_features'}};
}
} |
sub analysis
{ my ( $self, $analysis ) = @_;
if ($analysis) {
$self->{'_analysis'} = $analysis;
}
return $self->{'_analysis'}; } |
sub create_genewisehmm_complete
{ my ($self, $pfam_ids) = @_;
print STDERR "getting hmms\n"; print STDERR "\nthere are ".scalar(keys(%$pfam_ids))." pfam ids in database\n";
return unless scalar(keys(%$pfam_ids));
print STDERR "doing the hmm for ids: ". join(" ", keys(%$pfam_ids)) . "\n";
$self->run_genewisehmm(0);
print STDERR "removing hmmfile: ".$self->hmmfilename()."\n"; unlink $self->hmmfilename();
return undef; } |
sub create_genewisehmm_individually
{ my ($self, $pfam_ids, $dir) = @_;
print STDERR "getting hmms\n"; $self->workdir('/tmp') unless ($self->workdir($dir));
$self->checkdir();
my $memory = $self->memory;
print STDERR "there are ".scalar(keys(%$pfam_ids))." pfam ids to run\n";
while(my ($pfam_id, $strand) = each(%$pfam_ids)){
print STDERR "doing the hmm for id: $pfam_id\n"; $self->get_hmm($pfam_id);
if (-z $self->hmmfilename){
warning("hmm file not created :$!");
next;
}
my @strands = ();
if($strand == 1){
push(@strands, $strand);
}elsif($strand == -1){
push(@strands, $strand);
}elsif($strand == 0){
push(@strands, -1);
push(@strands, 1);
}else{
throw("strand = ".$strand." something funnies going on : $!\n");
}
foreach my $s (@strands){
$self->run_genewisehmm($s);
}
print STDERR "removing hmmfile: ".$self->hmmfilename()."\n"; unlink $self->hmmfilename();
}
throw("finished this bit");
return undef; } |
sub dbm_file
{ my ($self, $args) = @_;
if (defined ($args)){
$self->{'_dbm_file'} = $args;
}
return $self->{'_dbm_file'}; } |
sub display_genes
{ my ($self, $result) = @_;
my @results = @$result;
foreach my $obj (@results)
{
print STDERR ("gene: ".$obj->gffstring. "\n");
if ($obj->sub_SeqFeature)
{
foreach my $exon ($obj->sub_SeqFeature)
{
print STDERR "Exon: ".$exon->gffstring."\n";
if ($exon->sub_SeqFeature)
{
foreach my $sub ($exon->sub_SeqFeature){
print STDERR "supporting features: ".$sub->gffstring."\n";
}
}
}
}
}
}
1; } |
sub get_GenewiseHMM
{ my ($self, $strand, $memory) = @_;
my $reverse = ($strand == -1 ? 1 : undef);
print STDERR "creating genewisehmm strand $strand reverse $reverse\n"; print STDERR "OPTIONS To Genewise: ".$self->options()."\n";
$ENV{WISECONFIGDIR} = "/usr/local/ensembl/data/wisecfg/";
my $genewisehmm =
Bio::EnsEMBL::Analysis::Runnable::Finished::GenewiseHmm->new('-query' => $self->query(),
'-memory' => $memory,
'-hmmfile' => $self->hmmfilename(),
'-reverse' => $reverse,
'-genewise' => $self->program(),
'-options' => $self->options(),
'-analysis' => $self->analysis(),
);
return $genewisehmm; } |
sub get_hmm
{ my ($self, $id) = @_;
$self->hmmfilename($id.".".$$.".hmm");
my $command = $self->hmmfetch." ".$self->hmmdb." ".$id." > ".$self->hmmfilename;
eval{
system($command);
};
if($@){
warning("hmmfetch threw error : $@\n");
}
if (-z $self->hmmfilename){
warning("hmm file not created :$!");
}elsif(-e $self->hmmfilename){
open(ERROR, $self->hmmfilename) or die "couldn't open error file : $!";
while(<ERROR>){
if(/no such hmm/i){
print STDERR "error message : ".$_."\n";
}
}
close(ERROR) or die "couldn't close error file : $!";
} } |
sub get_hmmdb
{
my ($self, $pfam_ids) = @_;
my @pfamIds = keys(%$pfam_ids);
print STDERR "getting the hmms for ids: ". join(" ", @pfamIds) . "\n"; my $filename = substr(join("_", @pfamIds),0,20);
$self->hmmfilename("$filename.$$.hmmdb");
foreach my $id(@pfamIds){
print "getting hmm for id ".$id."\n";
my $command = $self->hmmfetch." ".$self->hmmdb." ".$id." >> ".$self->hmmfilename;
eval{
system($command);
};
if($@){
warning("hmmfetch threw error : $@\n");
}
if (-z $self->hmmfilename){
warning("hmm file not created :$!");
}elsif(-e $self->hmmfilename){
open(ERROR, $self->hmmfilename) or die "couldn't open error file : $!";
while(<ERROR>){
if(/no such hmm/i){
print STDERR "error message : ".$_."\n";
}
}
close(ERROR) or die "couldn't close error file : $!";
}
} } |
sub get_pfam_hmm
{ my ($self, $uniprot_ids, $dir) = @_;
my $pfam_accs;
my $pfam_lookup;
my $db = $self->pfamDB();
$self->workdir('/tmp') unless($self->workdir($dir));
$self->checkdir();
my $sql = "SELECT
DISTINCT(CONCAT(a.pfamA_acc,'.',a.version)),
a.pfamA_id,
a.description,
ls.hmm_ls
FROM
pfamA_reg_full_significant f,
pfamseq p,
pfamA a,
pfamA_HMM_ls ls
WHERE
f.auto_pfamseq = p.auto_pfamseq AND
f.in_full = 1 AND
a.auto_pfamA = f.auto_pfamA AND
ls.auto_pfamA = f.auto_pfamA AND
p.pfamseq_acc IN ";
my @accessions = keys(%$uniprot_ids);
map ( s/(\w*(-\d+)?)(\.\d+)?/'$1'/,@accessions);
next unless defined @accessions;
$sql .= "(".join(',',@accessions).")";
$self->hmmfilename("halfwise.$$.hmmdb");
open (HMMFILE, ">".$self->hmmfilename) or die "couldn't create hmm file ".$self->hmmfilename." $!";
my $sth = $db->prepare($sql);
$sth->execute();
my ($pfam_acc, $pfam_id, $description, $hmm_ls);
$sth->bind_columns(\($pfam_acc, $pfam_id, $description, $hmm_ls));
while(my $row = $sth->fetchrow_arrayref()){
$pfam_lookup->{$pfam_id} = [$pfam_acc, $description];
$pfam_accs->{$pfam_acc} = 1;
print HMMFILE $hmm_ls;
}
$sth->finish();
close HMMFILE;
$self->pfam_lookup($pfam_lookup);
return $pfam_accs; } |
sub get_pfam_ids
{
my ($self, $swissprot_ids) = @_;
my $pfam_accs;
my $pfam_lookup;
my $tbl_name = "pipeline_tmp_$$";
my $create_table = qq{CREATE TEMPORARY TABLE $tbl_name(
pfamseq_id varchar(12) NOT NULL PRIMARY KEY,
strand enum('1','0','-1') DEFAULT '0'
)TYPE = HEAP
}; my $db = $self->pfamDB();
my $sth = $db->prepare($create_table);
$sth->execute();
$sth->finish();
my (@binds, @values);
my $sql = qq{INSERT IGNORE INTO $tbl_name (pfamseq_id, strand) VALUES };
while (my ($swiss_id, $strand) = each(%$swissprot_ids)){
$swiss_id =~ s/\.\d+//;
push(@binds, $swiss_id, $strand);
push(@values, qq{ (?, ?)});
}
if(scalar(@values)){
$sql .= join(", ", @values);
$sth = $db->prepare($sql);
$sth->execute(@binds);
$sth->finish();
}
my $select = qq{SELECT CONCAT(a.pfamA_acc,".",a.version), t.strand, t.pfamseq_id, a.pfamA_id, a.description
FROM pfamA_reg_full f, pfamseq p, $tbl_name t, pfamA a
WHERE f.auto_pfamseq = p.auto_pfamseq
&& p.pfamseq_acc = t.pfamseq_id
&& f.in_full = 1
&& a.auto_pfamA = f.auto_pfamA;};
$sth = $db->prepare($select);
$sth->execute();
my ($pfam_acc, $strand, $swall, $pfam_id, $description);
$sth->bind_columns(\($pfam_acc, $strand, $swall, $pfam_id, $description));
while(my $row = $sth->fetchrow_arrayref()){
print STDERR "$swall : $strand : $pfam_acc : $pfam_id\n";
$pfam_lookup->{$pfam_id} = [$pfam_acc, $description];
if(defined($pfam_accs->{$pfam_acc})){
$pfam_accs->{$pfam_acc} = ($strand eq $pfam_accs->{$pfam_acc} ? $strand : 0);
}else{
$pfam_accs->{$pfam_acc} = $strand;
}
print "\n pfam_acc: $pfam_acc, strand: $strand, uniprot: $swall,pfamid: $pfam_id,description: $description";
}
$self->pfam_lookup($pfam_lookup); return $pfam_accs;
}
} |
sub get_swissprot_ids
{ my ($self) = @_;
my @features = $self->all_input_features();
my $swissprot_ids; foreach my $f(@features){
my $hname = $f->hseqname();
my $strand = $f->strand();
if(!defined($swissprot_ids->{$hname})){
$swissprot_ids->{$hname} = $strand;
}else{
$swissprot_ids->{$hname} = ($strand eq $swissprot_ids->{$hname} ? $strand : 0);
}
}
return $swissprot_ids; } |
sub hmmdb
{ my ($self, $args) =@_;
if (defined($args)){
$self->{'_hmmdb'} = $args;
}
return $self->{'_hmmdb'}; } |
sub hmmfetch
{ my ($self, $args) =@_;
if (defined($args)){
$self->{'_hmmfetch'} = $args;
}
return $self->{'_hmmfetch'}; } |
sub hmmfilename
{ my ($self, $args) = @_;
if (defined ($args)){
$self->{'_hmmfilename'} = $args;
}
return $self->{'_hmmfilename'}; } |
sub memory
{ my ($self,$arg) = @_;
if (defined($arg)) {
$self->{'_memory'} = $arg;
}
return $self->{'_memory'} || 400000;
}
} |
sub new
{ my ($class,@args) = @_;
my $self = $class->SUPER::new(@args);
$self->{'_query'} = undef;
$self->{'_input_features'} = [];
$self->{'_output_features'} = [];
$self->{'_hmmfetch'} = undef;
$self->{'_hmmdb'} = undef;
$self->{'_hmmfilename'} = undef; $self->{'_errorfile'} = undef; $self->{'_dbm_file'} = undef;
my ($genomic, $features, $hmmfetch, $hmmdb, $pfamdb, $memory, $options, $analysis,$program) = rearrange([qw(QUERY
FEATURES
HMMFETCH
HMMDB
PFAMDB
MEMORY
OPTIONS
ANALYSIS
PROGRAM)], @args);
throw("No genomic sequence input") unless defined($genomic);
$self->query($genomic);
throw("No features input") unless defined($features);
$self->add_input_features($features);
throw("No pfam database") unless $pfamdb;
$self->pfamDB($pfamdb);
$self->options($options);
$self->program($program);
throw("No genomic sequence input") unless defined($analysis);
$self->analysis($analysis);
if ($hmmfetch){
$self->hmmfetch($hmmfetch);
} else {
$self->hmmfetch('/usr/local/ensembl/bin/hmmfetch');
}
if($hmmdb){
$self->hmmdb($hmmdb);
} else {
$self->hmmdb('/data/blastdb/Finished/Pfam_ls');
}
$self->memory ($memory) if (defined($memory));
return $self; }
} |
sub output
{ my ($self) = @_;
return @{$self->{'_output_features'}}; } |
sub pfamDB
{ my ($self, $dbobj) = @_;
$self->{'_pfamDB'} = $dbobj if $dbobj;
throw("Not a Bio::EnsEMBL::DBSQL::DBAdaptor")
unless $self->{'_pfamDB'}->isa("Bio::EnsEMBL::DBSQL::DBConnection");
return $self->{'_pfamDB'}; } |
sub pfam_db_version
{ my ($self) = @_;
unless($self->{'_pfam_db_version'}){
my $db = $self->pfamDB();
$self->{'_pfam_db_version'} = $db->get_meta_value_by_key('version');
}
return $self->{'_pfam_db_version'}; } |
sub pfam_lookup
{ my ($self, $hash_ref) = @_;
if(ref($hash_ref) eq 'HASH'){
$self->{'_pfam_lookup'} ||= {};
$self->{'_pfam_lookup'} = { %{$self->{'_pfam_lookup'}}, %{$hash_ref} };
}
return $self->{'_pfam_lookup'}; } |
sub pfam_ls_version
{ my ($self) = @_;
unless($self->{'_pfam_ls_version'}){
my $ver = BlastableVersion->new($self->hmmdb);
$self->{'_pfam_ls_version'} = $ver->get_version($self->hmmdb);
}
return $self->{'_pfam_ls_version'}; } |
sub program
{ my ($self, $program) = @_;
$self->{'_program'} = $program if $program;
return $self->{'_program'}; } |
sub query
{ my( $self, $value ) = @_;
if ($value) {
$value->isa("Bio::PrimarySeqI") || throw("Input isn't a Bio::PrimarySeqI");
$self->{'_query'} = $value;
}
return $self->{'_query'}; } |
sub run
{
my ($self, $dir) = @_;
my $swissprot_ids = $self->get_swissprot_ids();
my $pfam_ids = $self->get_pfam_hmm($swissprot_ids,$dir);
$self->create_genewisehmm_complete($pfam_ids); } |
sub run_genewisehmm
{ my ($self, $strand) = @_;
my $memory = $self->memory;
print STDERR "running on strand: $strand\n"; my $genewisehmm = $self->get_GenewiseHMM($strand, $memory);
$genewisehmm->run();
my @features = $genewisehmm->output();
$self->display_genes(\@features); print STDERR "adding ".scalar(@features)." to output\n"; $self->add_output_features(\@features); } |
General documentation
rds@sanger.ac.uk
refactored by Sindhu K. Pillai sp1@sanger.ac.uk
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _