Bio::EnsEMBL::Analysis::Runnable
PSILC
Toolbar
Summary
Bio::EnsEMBL::Analysis::Runnable::PSILC;
Package variables
No package variables defined.
Included modules
Bio::Align::Utilities qw ( aa_to_dna_aln )
Bio::EnsEMBL::Analysis::Config::Pseudogene
Bio::Tools::Run::Alignment::Clustalw
Inherit
Synopsis
my $PSILC = Bio::EnsEMBL::Analysis::Runnable::PSILC->new
(
'-trans' => $transcript,
'-homologs' => $homologs,
'-analysis' => $analysis,
'-domain' => $domains,
'-input_id' => $self->input_id,
);
$runnabledb->fetch_input();
$runnabledb->run();
my @array = @{$runnabledb->output};
$runnabledb->write_output();
Description
Runnable for PSILC
Methods
Methods description
Arg [1] : array ref Description: get/set pfam domain identifiers Returntype : string of pfam identifiers Exceptions : none Caller : general |
Arg [1] : scalar - filename Description: marks the PSILC output files for deletion Returntype : none Exceptions : none Caller : general |
Arg [1] : array ref Description: get/set gene set to run over Returntype : array ref to Bio::EnsEMBL::Gene objects Exceptions : throws if not a Bio::EnsEMBL::Gene Caller : general |
Arg [1] : none Description: Makes a dna alignment based on a protein alignment using Bio::Align::Utilities DNA alignment is for the translateable part of the transcript only Returntype : none Exceptions : none Caller : general |
Arg [1] : none Description: Creates runnable Returntype : none Exceptions : none Caller : general |
Arg [1] : hash_ref Description: overrides output array Returntype : hash_ref Exceptions : none Caller : general |
Arg [1] : scalar - input id of the anlysis Description: Creates the output directory for the PSILC files Returntype : none Exceptions : none Caller : general |
Arg [1] : scalar - filename Description: parses PSILC results Returntype : none Exceptions : warns if the results are unreadable Caller : general |
Arg [1] : none Description: Makes alignment out of transcripts and runs PSILC Returntype : none Exceptions : none Caller : general |
Arg [1] : scalar - program name Description: runs the PSILC executable Returntype : none Exceptions : throws if program is not executable Caller : general |
Arg [1] : array ref Description: get/set trans set to run over Returntype : array ref to Bio::EnsEMBL::Transcript objects Exceptions : throws if not a Bio::EnsEMBL::Transcript Caller : general |
Arg [1] : none Description: Writes identifiers for PFAM domains that PSILC will search for in the protein alignment Returntype : none Exceptions : throws if it cannot open the file to write to Caller : general |
Methods code
sub domains
{ my ($self, $domains) = @_;
if ($domains) {
$self->{'_domains'} = $$domains;
}
return $self->{'_domains'}; } |
sub files_to_be_deleted
{ my ($self,$filename) = @_;
my $dir = $self->workdir;
my $domains = $self->domains;
$domains =~ s/PF.+\t//;
chomp $domains;
my @fnc = split /\./,$filename;
$self->files_to_delete($filename.".out");
$self->files_to_delete($fnc[0].".nhx");
$self->files_to_delete("$dir/pfamA");
$self->files_to_delete("$dir/PSILC_WAG_HKY/domdom");
$self->files_to_delete("$dir/PSILC_WAG_HKY/posteriorN");
$self->files_to_delete("$dir/PSILC_WAG_HKY/posteriorP");
$self->files_to_delete("$dir/PSILC_WAG_HKY/psilcN");
$self->files_to_delete("$dir/PSILC_WAG_HKY/psilcP");
$self->files_to_delete("$dir/PSILC_WAG_HKY/$domains");
return 1; } |
sub homologs
{ my ($self, $homologs) = @_;
if ($homologs) {
foreach my $hom (@{$homologs}){
unless ($hom->isa("Bio::EnsEMBL::Transcript")){
$self->throw("Input isn't a Bio::EnsEMBL::Transcript, it is a $hom\n$@");
}
}
$self->{'_homologs'} = $homologs;
}
return $self->{'_homologs'}; } |
sub id
{ my ($self) = @_;
if ($self->trans->stable_id){
return $self->trans->stable_id;
}
return $self->trans->dbID; } |
sub make_codon_based_alignment
{ my ($self) = @_;
my @aaseqs;
my $aaseq;
my %dnaseqs;
my $filename = $self->queryfile.".aln";
my $alignIO = Bio::AlignIO->new
(
-file => ">$filename",
-format => "clustalw",
);
my $pep_alignIO = Bio::AlignIO->new
(
-file => ">$filename.pep",
-format => "clustalw",
);
my $clustalw = Bio::Tools::Run::Alignment::Clustalw->new();
$aaseq = $self->trans->translate;
$aaseq->display_id($self->id);
push @aaseqs,$aaseq;
$dnaseqs{$self->id} = Bio::Seq->new
(
-display_id => $self->id,
-seq => $self->trans->translateable_seq,
);
foreach my $homolog(@{$self->homologs}){
next if ($homolog->stable_id eq $self->id);
$aaseq = $homolog->translate;
$aaseq->display_id($homolog->stable_id);
push @aaseqs,$aaseq;
$dnaseqs{$homolog->stable_id} = Bio::Seq->new
(
-display_id => $homolog->stable_id,
-seq => $homolog->translateable_seq,
);
}
foreach my $seq (@aaseqs){
print $seq->display_id."\n";
}
my $alignment = $clustalw->align(\@aaseqs);
my $dna_aln = aa_to_dna_aln($alignment,\%dnaseqs);
foreach my $seq ($dna_aln->each_seq){
print $seq->display_id."\n";
}
$pep_alignIO->write_aln($alignment);
$alignIO->write_aln($dna_aln);
return 1;
}
} |
sub new
{ my ($class,@args) = @_;
my $self = $class->SUPER::new(@args);
$self->{'_trans'} = {}; $self->{'_homologs'} = (); $self->{'_domains'} = ();
my($trans,$domains,$homologs,$input_id) = $self->_rearrange([qw(
TRANS
DOMAIN
HOMOLOGS
INPUT_ID
)], @args);
if ($trans) {
$self->trans($trans);
}
if ($homologs) {
$self->homologs($homologs);
}
if ($domains) {
$self->domains($domains);
}
$self->output_dir($input_id);
$self->program('/nfs/acari/sw4/Pseudogenes/PSILC/psilc1.21/psilc.jar');
return $self; } |
sub output
{ my ($self, $hash_ref) = @_;
if ($hash_ref) {
$self->{'_output'} = $hash_ref;
}
return $self->{'_output'};
}
1; } |
sub output_dir
{ my ($self,$input_id) = @_;
my $output_dir;
$output_dir = $self->id;
if ($PSILC_WORK_DIR){
system("mkdir $PSILC_WORK_DIR/$$input_id/$output_dir");
$self->workdir("$PSILC_WORK_DIR/$$input_id/$output_dir");
}
else{
$self->throw("Cannot make output directory\n");
}
return 1; } |
sub parse_results
{ my ($self, $filename) =@_;
my ($nuc_dom,$prot_dom,$postPMax,$postNmax) = 0;
my $dir = $self->workdir;
my $trans = $self->trans;
my $id = $self->id;
open (PSILC,"$dir/PSILC_WAG_HKY/summary") or $self->warn("Cannot open results file $dir/PSILC_WAG_HKY/summary\n$@\n");
while(<PSILC>){
chomp;
next unless $_ =~ /^$id\/\S+\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)/;
$nuc_dom = $1;
$prot_dom = $2;
$postPMax = $3;
$postNmax = $4;
last;
}
close PSILC;
unless ($nuc_dom){
$self->warn("ERROR unable to parse results file $dir/PSILC_WAG_HKY/summary\n");
$self->warn("Failed for $id");
return 0;
}
my %results = (
'id' => $id,
'prot_dom' => $prot_dom,
'nuc_dom' => $nuc_dom,
'postPMax' => $postPMax,
'postNMax' => $postNmax,
);
$self->output(\%results);
return 1; } |
sub run
{ my ($self) = @_;
$self->checkdir();
$self->write_pfam_ids;
my $filename = $self->create_filename('PSILC','fasta');
$self->make_codon_based_alignment;
$self->run_analysis();
$self->files_to_be_deleted($filename);
$self->parse_results;
} |
sub run_analysis
{ my ($self, $program) = @_;
if(!$program){
$program = $self->program;
}
throw($program." is not executable Runnable::run_analysis ")
unless($program && -x $program);
my $file = $self->queryfile;
my $dir = $self->workdir;
$file =~ s/^$dir\///;
my $command = "java -Xmx400m -jar $program";
$command .= " --align $file.aln";
$command .= " --seq ".$self->id;
$command .= " --restrict 1";
$command .= " --max_nodes 10:6";
$command .= " --repository /ecs2/work2/sw4/PFAM";
$command .= " > ".$self->resultsfile;
print "Running analysis ".$command."\n";
system($command) == 0 or $self->throw("FAILED to run ".$command); } |
sub trans
{ my ($self, $trans) = @_;
if ($trans) {
unless ($trans->isa("Bio::EnsEMBL::Transcript")){
$self->throw("Input isn't a Bio::EnsEMBL::Transcript, it is a $trans\n$@");
}
$self->{'_trans'} = $trans;
}
return $self->{'_trans'}; } |
sub write_pfam_ids
{ my ($self)=@_;
my $dir = $self->workdir;
my $domains = $self->domains;
print STDERR "writing pfam ids to $dir/pfamA\n";
open (IDS,">$dir/pfamA") or $self->throw("Cannot open file for writing pfam ids $dir/pfamA\n");
print IDS "$domains";
close IDS;
return 1; } |
General documentation