各位大佬好:
不重要的背景:
我希望利用Julia的BioStructure包来实现对已经叠合好的结构中模板(template)与诱饵(decoy)结构中匹配序列的抓取,目前的实现思路是根据CA原子的距离来确定模板中与诱饵相匹配的元素,并进一步获取元素的序列;
在此过程中,需要考虑到匹配到的元素与诱饵结构得残基数不等的情况,因此,我希望通过以下代码来构造几个不同的“起始点和终止点”进行rmsd的计算,选取最接近的一段序列;
出问题的代码
在脚本中,我预先获得了template_N和template_C这两个变量,随后利用这两个变量和预先获取的诱饵结构长度变量生成了一组用于定义序列区间的变量,随后我希望在循环比较rmsd的过程中使用这些变量,于是使用了new_var = eval(Symbol(“template_N_”*“$N”))的方法,结果报错UndefVarError(:new_var):
下面是出问题的区域:
template_N_1 = template_N
template_N_2 = template_C - decoy_motif_length + 1
out_seq_N = template_N
best_rmsd_val = 1000
for N = 1:2
template_N = eval(Symbol("template_N_"*"$N"))
目前可以确定问题出在 template_N = eval(Symbol(“template_N_”*“$N”))这一句,请问各位能否帮忙赶一下问题可能出在哪里?感激不尽~
下面是整个脚本的完整代码:
#!/bin/julia
using BioStructures
function get_decoy_motif(decoy_structure,decoy_motif_region,decoy_chain_id,template,out_fasta,motif_N,motif_C,error)
try
template_structure= read(template, PDB)
res_id = 0
template_N = 0
template_C = 0
distance_N = 0
distance_C = 0
for model in template_structure
for chain in model
for res in chain
res_id == 0 && (template_N = resnumber(res))
res_id == 0 && (distance_N = distance(decoy_structure[decoy_chain_id][motif_N], res, calphaselector))
res_id == 0 && (template_C = resnumber(res))
res_id == 0 && (distance_C = distance(decoy_structure[decoy_chain_id][motif_C], res, calphaselector))
res_id+=1
if distance(decoy_structure[decoy_chain_id][motif_N], res) <= distance_N
template_N = resnumber(res)
distance_N = distance(decoy_structure[decoy_chain_id][motif_N], res)
end
if distance(decoy_structure[decoy_chain_id][motif_C], res) <= distance_C
template_C = resnumber(res)
distance_C = distance(decoy_structure[decoy_chain_id][motif_C], res)
template_chain_id = chainid(chain)
end
end
end
end
decoy_motif_length = motif_C - motif_N + 1
template_N_1 = template_N
template_N_2 = template_C - decoy_motif_length + 1
template_N_3 = template_N + 1
template_N_4 = template_N - 1
template_C_1 = template_C
template_C_2 = template_N + decoy_motif_length - 1
template_C_3 = template_C + 1
template_C_4 = template_C - 1
out_seq_N = template_N
out_seq_C = template_C
best_rmsd_val = 1000
for N = 1:4
template_N = eval(Symbol("template_N_"*"$N"))
for C = 1:4
template_C = eval(Symbol("template_C_"*"$C"))
template_motif_length = template_C - template_N + 1
if template_motif_length == decoy_motif_length
motif_region = collectresidues(template_structure[template_chain_id], res -> template_N <= resnumber(res) <= template_C )
rmsd_val = rmsd(motif_region, decoy_motif_region, superimpose=false)
if rmsd_val <= best_rmsd_val
best_rmsd_val = rmsd_val
out_seq_N = template_N
out_seq_C = template_C
end
end
end
end
motif_region = collectresidues(template_structure, res -> out_seq_N <= resnumber(res) <= out_seq_C )
fasta_seq = LongAminoAcidSeq(motif_region)
write(out_fasta,">$template:$fasta_seq \n")
catch e1
write(error,"Error: $template.\n")
write(error,"$e1\n\n")
end
end
function main()
decoy_pdb="decoy.pdb"
template_library="Template_lib"
out_fasta="Candidates.fasta"
decoy_structure = read(decoy_pdb, PDB)
io = open(out_fasta, "w+")
error = open("ex_seq.error", "w+")
res_id = 0
motif_N = 0
motif_C = 0
decoy_chain_id = 0
for model in decoy_structure
for chain in model
decoy_chain_id = chainid(chain)
for res in chain
res_id == 0 && (motif_N = resnumber(res))
res_id == 0 && (motif_C = resnumber(res))
res_id+=1
resnumber(res) < motif_N && (motif_N = resnumber(res))
resnumber(res) > motif_C && (motif_C = resnumber(res))
end
end
end
decoy_motif_region = collectresidues(decoy_structure, res -> motif_N <= resnumber(res) <= motif_C)
Threads.@threads for template in readdir(template_library)
get_decoy_motif(decoy_structure,decoy_motif_region,decoy_chain_id,"$template_library/$template",io,motif_N,motif_C,error)
end
end
main()