This SO answer should be informative, but I would like to avoid using BioPython to limit dependencies.
Also, while we're doing this, see about refactoring the lines below so that it uses a single re.finditer expression instead of two:
|
for x in fpri: # x is the key of the dict fpri, x is the sequence |
|
ma = [m.start() for m in re.finditer(x, se)] |
|
if len(ma) > 0: |
|
for st in ma: |
|
tempseq = se[st:] |
|
for y in rpri: |
|
rma = [m.start() for m in re.finditer(y, tempseq)] |
|
if len(rma) > 0: |
|
# build an array based on x's forward primer cluster ID, [c001, c002, ...] |
|
fpri_li = fpri[x].split(",") |
|
for i, ff in enumerate(fpri_li): |
|
f = ff.split(".")[-2] |
|
fpri_li[i] = f |
|
#print(fpri_li) |
|
|
|
# build an array based on y's reverse primer cluster ID, [c001, c002, ...] |
|
rpri_li = rpri[y].split(",") |
|
for j, rr in enumerate(rpri_li): |
|
r = rr.split(".")[-2] |
|
rpri_li[j] = r |
|
#print(rpri_li) |
An example from the ISqPCR code I wrote for another app:
import re
forward_primer = replace_ambiguous_bases(forward_primer)
reverse_primer = reverse_complement(replace_ambiguous_bases(reverse_primer))
primer_pattern = re.compile('({}).*({})'.format(forward_primer, reverse_primer))
# for match in [match for match in re.finditer(primer_pattern, target_sequence)]:
for match in re.finditer(primer_pattern, target_sequence):
product = target_sequence[match.start():match.end()]
return '{}\t{}\t{}\t{}\t{}\t{}\n'.format(primer_name, target_name, match.start(), match.end(),
match.end() - match.start(), product)
This SO answer should be informative, but I would like to avoid using BioPython to limit dependencies.
Also, while we're doing this, see about refactoring the lines below so that it uses a single
re.finditerexpression instead of two:MetaFunPrimer/src/get_pcr_product.py
Lines 66 to 86 in f1f6438
An example from the ISqPCR code I wrote for another app: