None available.
sub filter_inconsistent_chains
{ my ($input_chains, $overlap_chain_thresh) = @_;
my (@sorted_chains, @to_ignore_chains, @kept_chains);
foreach my $bl_list (@$input_chains) {
my $qga_l = $bl_list->[0]->reference_genomic_align;
my $qga_r = $bl_list->[-1]->reference_genomic_align;
my ($tga_l) = @{$bl_list->[0]->get_all_non_reference_genomic_aligns};
my ($tga_r) = @{$bl_list->[-1]->get_all_non_reference_genomic_aligns};
push @sorted_chains, {
blocks => $bl_list,
score => $bl_list->[0]->score,
qid => $qga_l->dnafrag->name,
qstart => $qga_l->dnafrag_start,
qend => $qga_r->dnafrag_end,
tid => $tga_l->dnafrag->name,
tstart => $tga_l->dnafrag_strand > 0 ? $tga_l->dnafrag_start : $tga_r->dnafrag_start,
tend => $tga_l->dnafrag_strand > 0 ? $tga_r->dnafrag_end : $tga_l->dnafrag_end,
tstrand => $tga_l->dnafrag_strand,
};
}
for(my $i=0; $i < @sorted_chains; $i++) {
next if $to_ignore_chains[$i];
my (@ov_chains, @block_ov_regs);
my $c = $sorted_chains[$i];
my @these_blocks = @{$c->{blocks}};
foreach my $kc (@kept_chains) {
if ($c->{qstart} <= $kc->{qend} and
$c->{qend} >= $kc->{qstart}) {
push @ov_chains, $kc;
}
}
for (my $i=0; $i < @these_blocks; $i++) {
my $b = $these_blocks[$i];
my $qga = $b->reference_genomic_align;
$block_ov_regs[$i] = [];
KCHAIN: foreach my $kc (@ov_chains) {
foreach my $kb (@{$kc->{blocks}}) {
my $k_qga = $kb->reference_genomic_align;
if ($qga->dnafrag_start <= $k_qga->dnafrag_end and
$qga->dnafrag_end >= $k_qga->dnafrag_start) {
my $ov_start = $k_qga->dnafrag_start;
$ov_start = $qga->dnafrag_start
if $qga->dnafrag_start > $ov_start;
my $ov_end = $k_qga->dnafrag_end;
$ov_end = $qga->dnafrag_end
if $qga->dnafrag_end < $ov_end;
push @{$block_ov_regs[$i]}, {
start => $ov_start,
end => $ov_end,
};
}
}
}
}
my $rejected = 0;
if (not grep { scalar(@{$_}) > 0 } @block_ov_regs) {
push @kept_chains, $c;
} else {
my $total_chain_len = 0;
for(my $i=0; $i < @these_blocks; $i++) {
my $b = $these_blocks[$i];
my $ga = $b->reference_genomic_align;
$total_chain_len += $ga->dnafrag_end - $ga->dnafrag_start + 1;
}
for (my $i=0; $i < @block_ov_regs; $i++) {
my @regs = @{$block_ov_regs[$i]};
my @new_regs;
foreach my $reg (sort { $a->{start} <=> $b->{start} } @regs) {
if (not @new_regs or $reg->{start} > $new_regs[-1]->{end}) {
push @new_regs, $reg;
} else {
if ($reg->{end} > $new_regs[-1]->{end}) {
$new_regs[-1]->{end} = $reg->{end};
}
}
}
$block_ov_regs[$i] =\@ new_regs;
}
my @contig_chain_parts;
my $last_was_non_ov = 0;
for(my $i=0; $i < @these_blocks; $i++) {
my $block = $these_blocks[$i];
my $qga = $block->reference_genomic_align;
my $ovs = $block_ov_regs[$i];
if (@$ovs) {
if ($ovs->[0]->{start} > $qga->dnafrag_start) {
if ($last_was_non_ov) {
push @{$contig_chain_parts[-1]}, {
index => $i,
start => $qga->dnafrag_start,
end => $ovs->[0]->{start} - 1,
};
} else {
push @contig_chain_parts, [{
index => $i,
start => $qga->dnafrag_start,
end => $ovs->[0]->{start} - 1,
}];
}
}
for(my $j=1; $j < @$ovs; $j++) {
push @contig_chain_parts, [{
index => $i,
start => $ovs->[$j-1]->{end} + 1,
end => $ovs->[$j]->{start} - 1,
}];
}
if ($ovs->[-1]->{end} < $qga->dnafrag_end) {
push @contig_chain_parts, [{
index => $i,
start => $ovs->[-1]->{end} + 1,
end => $qga->dnafrag_end,
}];
$last_was_non_ov = 1;
} else {
$last_was_non_ov = 0;
}
} else {
if ($last_was_non_ov) {
push @{$contig_chain_parts[-1]}, {
index => $i,
start => $qga->dnafrag_start,
end => $qga->dnafrag_end,
};
} else {
push @contig_chain_parts, [{
index => $i,
start => $qga->dnafrag_start,
end => $qga->dnafrag_end,
}];
}
$last_was_non_ov = 1;
}
}
my ($longest, $longest_len);
if (not @contig_chain_parts) {
$longest_len = 0;
} else {
foreach my $reg_list (@contig_chain_parts) {
my $len = 0;
foreach my $reg (@$reg_list) {
$len += $reg->{end} - $reg->{start} + 1;
}
if (not defined $longest or $len > $longest_len) {
$longest = $reg_list;
$longest_len = $len;
}
}
}
if ($longest_len / $total_chain_len > $overlap_chain_thresh) { my @bs; foreach my $el (@$longest) {
my $block = $these_blocks[$el->{index}];
my $st = $el->{start};
my $en = $el->{end};
my $qga = $block->reference_genomic_align;
if ($st != $qga->dnafrag_start or
$en != $qga->dnafrag_end) {
my $new_block = $block->restrict_between_reference_positions($st, $en);
$new_block->score($block->score);
push @bs, $new_block;
} else {
push @bs, $block;
}
};
my $qga_l = $bs[0]->reference_genomic_align;
my $qga_r = $bs[-1]->reference_genomic_align;
my ($tga_l) = @{$bs[0]->get_all_non_reference_genomic_aligns};
my ($tga_r) = @{$bs[-1]->get_all_non_reference_genomic_aligns};
$c->{blocks} =\@ bs;
$c->{qstart} = $qga_l->dnafrag_start;
$c->{qend} = $qga_r->dnafrag_end;
$c->{tstart} = ($tga_l->dnafrag_strand > 0)
? $tga_l->dnafrag_start
: $tga_r->dnafrag_start;
$c->{tend} = ($tga_l->dnafrag_strand > 0)
? $tga_r->dnafrag_end
: $tga_l->dnafrag_end;
push @kept_chains, $c;
} else {
$rejected = 1;
}
}
if ($rejected) {
for(my $j=$i+1; $j < @sorted_chains; $j++) {
if ($sorted_chains[$j]->{tid} eq $c->{tid}) {
$to_ignore_chains[$j] = 1;
}
}
} else {
for(my $j=$i+1; $j < @sorted_chains; $j++) {
my $oc = $sorted_chains[$j];
my $consistent = 0;
if ($oc->{tid} eq $c->{tid} and
$oc->{tstrand} eq $c->{tstrand}) {
if ($oc->{qstart} > $c->{qend}) {
if ($oc->{tstrand} > 0 and $oc->{tstart} > $c->{tend} or
$oc->{tstrand} < 0 and $oc->{tend} < $c->{tstart}) {
$consistent = 1;
}
} elsif ($oc->{qend} < $c->{qstart}) {
if ($oc->{tstrand} > 0 and $c->{tstart} > $oc->{tend} or
$oc->{tstrand} < 0 and $c->{tend} < $oc->{tstart}) {
$consistent = 1;
}
}
}
if ($consistent) {
for(my $k=0; $k < @kept_chains; $k++) {
if ($oc->{qstart} <= $kept_chains[$k]->{qend} and
$oc->{qend} >= $kept_chains[$k]->{qstart}) {
$consistent = 0;
last;
}
}
if ($consistent) {
push @kept_chains, $oc;
$to_ignore_chains[$j] = 1;
}
}
}
}
}
return [map { $_->{blocks} } @kept_chains];
}
} |
sub stringify_chains
{ my ($chains) = @_;
my $str = "";
foreach my $c (@$chains) {
$str .= sprintf "CHAIN %d\n", $c->[0]->score;
foreach my $b (@$c) {
my $q = $b->reference_genomic_align;
my ($t) = @{$b->get_all_non_reference_genomic_aligns};
$str .= sprintf(" %s/%d-%d, %s/%d-%d %d (%d)\n",
$q->dnafrag->name,
$q->dnafrag_start,
$q->dnafrag_end,
$t->dnafrag->name,
$t->dnafrag_start,
$t->dnafrag_end,
$t->dnafrag_strand,
$t->level_id);
}
}
return $str;
}
1; } |