Bio::EnsEMBL::Analysis::Runnable
AlignmentChains
Toolbar
Summary
Bio::EnsEMBL::Pipeline::Runnable::AlignmentChains
Package variables
No package variables defined.
Included modules
Inherit
Synopsis
Description
Methods
axtChain | No description | Code |
faToNib | No description | Code |
features | No description | Code |
lavToAxt | No description | Code |
min_chain_score | No description | Code |
new | No description | Code |
parse_Chain_file | No description | Code |
query_nib_dir | No description | Code |
query_slice | No description | Code |
run | Description | Code |
target_nib_dir | No description | Code |
target_slices | No description | Code |
write_lav | No description | Code |
Methods description
Title : run Usage : $self->run() Function: Returns : none Args : |
Methods code
sub axtChain
{ my ($self,$arg) = @_;
if (defined($arg)) {
$self->{'_axtChain'} = $arg;
}
return $self->{'_axtChain'};
}
1; } |
sub faToNib
{ my ($self,$arg) = @_;
if (defined($arg)) {
$self->{'_faToNib'} = $arg;
}
return $self->{'_faToNib'}; } |
sub features
{ my ($self, $features) = @_;
if (defined $features) {
$self->{_features} = $features;
}
return $self->{_features}; } |
sub lavToAxt
{ my ($self,$arg) = @_;
if (defined($arg)) {
$self->{'_lavToAxt'} = $arg;
}
return $self->{'_lavToAxt'}; } |
sub min_chain_score
{ my ($self, $val) = @_;
if (defined $val) {
$self->{_min_chain_score} = $val;
}
if (not exists $self->{_min_chain_score}) {
return undef;
} else {
return $self->{_min_chain_score};
}
}
} |
sub new
{ my ($class,@args) = @_;
my $self = $class->SUPER::new(@args);
my ($features,
$query_slice,
$query_nib_dir,
$target_slices,
$target_nib_dir,
$min_chain_score,
$fa_to_nib,
$lav_to_axt,
$axt_chain,
) = rearrange([qw(FEATURES
QUERY_SLICE
QUERY_NIB_DIR
TARGET_SLICES
TARGET_NIB_DIR
MIN_CHAIN_SCORE
FATONIB
LAVTOAXT
AXTCHAIN
)],
@args);
throw("You must supply a reference to an array of features with -features\n")
if not defined $features;
throw("You must supply a query sequence\n")
if not defined $query_slice;
throw("You must supply a hash ref of target sequences with -target_slices")
if not defined $target_slices;
$self->faToNib($fa_to_nib) if defined $fa_to_nib;
$self->lavToAxt($lav_to_axt) if defined $lav_to_axt;
$self->axtChain($axt_chain) if defined $axt_chain;
$self->query_nib_dir($query_nib_dir) if defined $query_nib_dir;
$self->target_nib_dir($target_nib_dir) if defined $target_nib_dir;
$self->query_slice($query_slice);
$self->target_slices($target_slices);
$self->min_chain_score($min_chain_score) if defined $min_chain_score;
$self->features($features);
return $self; } |
sub parse_Chain_file
{ my ($self, $fh) = @_;
my @chains;
while(<$fh>) {
/^chain\s+(\S.+)$/ and do {
my @data = split /\s+/, $1;
my $chain = {
q_id => $data[1],
q_len => $data[2],
q_strand => $data[3] eq "-" ? -1 : 1,
t_id => $data[6],
t_len => $data[7],
t_strand => $data[8] eq "-" ? -1 : 1,
score => $data[0],
blocks => [],
};
my ($current_q_start, $current_t_start) = ($data[4] + 1, $data[9] + 1);
my @blocks = ([]);
while(<$fh>) {
if (/^(\d+)(\s+\d+\s+\d+)?$/) {
my ($ungapped, $rest) = ($1, $2);
my ($current_q_end, $current_t_end) =
($current_q_start + $ungapped - 1, $current_t_start + $ungapped - 1);
push @{$blocks[-1]}, { q_start => $current_q_start,
q_end => $current_q_end,
t_start => $current_t_start,
t_end => $current_t_end,
};
if ($rest and $rest =~ /\s+(\d+)\s+(\d+)/) {
my ($gap_q, $gap_t) = ($1, $2);
$current_q_start = $current_q_end + $gap_q + 1;
$current_t_start = $current_t_end + $gap_t + 1;
if ($gap_q != 0 and $gap_t !=0) {
push @blocks, [];
}
} else {
last;
}
}
else {
throw("Not expecting line '$_' in chain file");
}
}
foreach my $block (@blocks) {
my @ug_feats;
foreach my $ug_feat (@$block) {
if ($chain->{q_strand} < 0) {
my ($rev_q_start, $rev_q_end) = ($ug_feat->{q_start}, $ug_feat->{q_end});
$ug_feat->{q_start} = $chain->{q_len} - $rev_q_end + 1;
$ug_feat->{q_end} = $chain->{q_len} - $rev_q_start + 1;
}
if ($chain->{t_strand} < 0) {
my ($rev_t_start, $rev_t_end) = ($ug_feat->{t_start}, $ug_feat->{t_end});
$ug_feat->{t_start} = $chain->{t_len} - $rev_t_end + 1;
$ug_feat->{t_end} = $chain->{t_len} - $rev_t_start + 1;
}
my $fp = new Bio::EnsEMBL::FeaturePair->new();
$fp->seqname($chain->{q_id});
$fp->start($ug_feat->{q_start});
$fp->end($ug_feat->{q_end});
$fp->strand($chain->{q_strand});
$fp->hseqname($chain->{t_id});
$fp->hstart($ug_feat->{t_start});
$fp->hend($ug_feat->{t_end});
$fp->hstrand($chain->{t_strand});
$fp->score($chain->{score});
push @ug_feats, $fp;
}
my $dalf = new Bio::EnsEMBL::DnaDnaAlignFeature(-features =>\@ ug_feats);
$dalf->level_id(1);
push @{$chain->{blocks}}, $dalf;
}
push @chains, $chain->{blocks};
}
}
return\@ chains;
}
} |
sub query_nib_dir
{ my ($self, $val) = @_;
if (defined $val) {
$self->{_query_nib_dir} = $val;
}
return $self->{_query_nib_dir}; } |
sub query_slice
{ my ($self, $slice) = @_;
if (defined $slice) {
$self->{_query_slice} = $slice;
}
return $self->{_query_slice}; } |
sub run
{ my ($self) = @_;
my $query_name = $self->query_slice->seq_region_name;
my $work_dir = $self->workdir . "/$query_name.$$.AxtChain";
my $lav_file = "$work_dir/$query_name.lav";
my $axt_file = "$work_dir/$query_name.axt";
my $chain_file = "$work_dir/$query_name.chain";
my (@nib_files, $query_nib_dir, $target_nib_dir);
mkdir $work_dir;
my $fh;
if ($self->query_nib_dir) {
if (not -d $self->query_nib_dir) {
throw("Could not fine query nib file directory:" . $self->query_nib_dir);
} else {
$query_nib_dir = $self->query_nib_dir;
}
} else {
$query_nib_dir = "$work_dir/query_nib";
mkdir $query_nib_dir;
my $seqio = Bio::SeqIO->new(-format => 'fasta',
-file => ">$query_nib_dir/$query_name.fa");
$seqio->write_seq($self->query_slice);
$seqio->close;
system($self->faToNib, "$query_nib_dir/$query_name.fa", "$query_nib_dir/$query_name.nib")
and throw("Could not convert fasta file $query_nib_dir/$query_name.fa to nib");
unlink "$query_nib_dir/$query_name.fa";
push @nib_files, "$query_nib_dir/$query_name.nib";
}
if ($self->target_nib_dir) {
if (not -d $self->target_nib_dir) {
throw("Could not fine target nib file directory:" . $self->target_nib_dir);
} else {
$target_nib_dir = $self->target_nib_dir;
}
} else {
$target_nib_dir = "$work_dir/target_nib";
mkdir $target_nib_dir;
foreach my $nm (keys %{$self->target_slices}) {
my $target = $self->target_slices->{$nm};
my $target_name = $target->seq_region_name;
my $seqio = Bio::SeqIO->new(-format => 'fasta',
-file => ">$target_nib_dir/$target_name.fa");
$seqio->write_seq($target);
$seqio->close;
system($self->faToNib, "$target_nib_dir/$target_name.fa", "$target_nib_dir/$target_name.nib")
and throw("Could not convert fasta file $target_nib_dir/$target_name.fa to nib");
unlink "$target_nib_dir/$target_name.fa";
push @nib_files, "$target_nib_dir/$target_name.nib";
}
}
open $fh, ">$lav_file" or
throw("could not open lav file '$lav_file' for writing\n");
$self->write_lav($fh);
close($fh);
system($self->lavToAxt, $lav_file, $query_nib_dir, $target_nib_dir, $axt_file)
and throw("Could not convert $lav_file to Axt format");
unlink $lav_file;
my $min_parameter = "-minScore=";
if (defined $self->min_chain_score) {
$min_parameter .= $self->min_chain_score;
} else {
$min_parameter .= 1000;
}
system($self->axtChain, $min_parameter, $axt_file, $query_nib_dir, $target_nib_dir, $chain_file)
and throw("Something went wrong with axtChain\n");
unlink $axt_file;
open $fh, $chain_file or throw("Could not open chainfile '$chain_file' for reading\n");
my $chains = $self->parse_Chain_file($fh);
close($fh);
$self->output($chains);
unlink $chain_file, @nib_files;
rmdir $query_nib_dir if not $self->query_nib_dir;
rmdir $target_nib_dir if not $self->target_nib_dir;
rmdir $work_dir;
return 1;
}
} |
sub target_nib_dir
{ my ($self, $val) = @_;
if (defined $val) {
$self->{_target_nib_dir} = $val;
}
return $self->{_target_nib_dir}; } |
sub target_slices
{ my ($self, $hash_ref) = @_;
if (defined $hash_ref) {
$self->{_target_slices_hashref} = $hash_ref;
}
return $self->{_target_slices_hashref}; } |
sub write_lav
{ my ($self, $fh) = @_;
my (%features);
foreach my $feat (sort {$a->start <=> $b->start} @{$self->features}) {
my $strand = $feat->strand;
my $hstrand = $feat->hstrand;
if ($strand == -1) {
$strand *= -1;
$hstrand *= -1;
}
push @{$features{$feat->hseqname}{$strand}{$hstrand}}, $feat;
}
my $query_length = $self->query_slice->length;
my $query_name = $self->query_slice->seq_region_name;
foreach my $target (sort keys %features) {
print $fh "#:lav\n";
print $fh "d {\n\" generated by Runnable/AxtFilter.pm\"\n}\n";
foreach my $qstrand (keys %{$features{$target}}) {
foreach my $tstrand (keys %{$features{$target}{$qstrand}}) {
my $query_strand = ($qstrand == 1) ? 0 : 1;
my $target_strand = ($tstrand == 1) ? 0 : 1;
my $target_length = $self->target_slices->{$target}->length;
print $fh "#:lav\n";
print $fh "s {\n";
print $fh "\" $query_name\" 1 $query_length $query_strand 1\n";
print $fh "\" $target\" 1 $target_length $target_strand 1\n";
print $fh "}\n";
print $fh "h {\n";
print $fh "\" >$query_name";
if ($query_strand) {
print $fh " (reverse complement)";
}
print $fh "\"\n\" >$target";
if ($target_strand) {
print $fh " (reverse complement)";
}
print $fh "\"\n}\n";
foreach my $reg (@{$features{$target}{$qstrand}{$tstrand}}) {
my $qstart = $query_strand ? $query_length - $reg->end + 1 : $reg->start;
my $qend = $query_strand ? $query_length - $reg->start + 1 : $reg->end;
my $tstart = $target_strand ? $target_length - $reg->hend + 1 : $reg->hstart;
my $tend = $target_strand ? $target_length - $reg->hstart + 1 : $reg->hend;
my $score = defined($reg->score) ? $reg->score : 100;
my $percent_id = defined($reg->percent_id) ? $reg->percent_id : 100;
printf $fh "a {\n s %d\n", $score;
print $fh " b $qstart $tstart\n";
print $fh " e $qend $tend\n";
my @ug_feats = $reg->ungapped_features;
if ($qstrand == -1) {
@ug_feats = sort { $b->start <=> $a->start } @ug_feats;
} else {
@ug_feats = sort { $a->start <=> $b->start } @ug_feats;
}
foreach my $seg (@ug_feats) {
my $qstartl = $query_strand ? $query_length - $seg->end + 1 : $seg->start;
my $qendl = $query_strand ? $query_length - $seg->start + 1 : $seg->end;
my $tstartl = $target_strand ? $target_length - $seg->hend + 1 : $seg->hstart;
my $tendl = $target_strand ? $target_length - $seg->hstart + 1 : $seg->hend;
printf $fh " l $qstartl $tstartl $qendl $tendl %d\n", $percent_id;
}
print $fh "}\n";
}
print $fh "x {\n n 0\n}\n";
}
}
print $fh "m {\n n 0\n}\n#:eof\n";
}
}
} |
General documentation
Describe contact details here
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _