function conver(y,string)
ls=length(string)
if ls >= y+2
s=string[y+1:y+2]
if 'N' in s
return missing
else
return s=="CG"
end
end
end
function classify_reads(index,match_read_cpg,starts_cpgs,starts_reads,seqs_reads,overlapcopy)
covered_cpgs = match_read_cpg[index][:,2]
if length(covered_cpgs)<4
return missing
end
start_cpgs=starts_cpgs[covered_cpgs]
start_of_read=starts_reads[index]
start_cpgs = start_cpgs .- start_of_read
sequence=seqs_reads[index]
representation=[]
for i in range(1,length(start_cpgs))
c=conver(start_cpgs[i],String(sequence))
if isequal(c,missing)|isequal(c,nothing)
deleteat!(overlapcopy,findall(overlapcopy.queryHits.==index .&& overlapcopy.subjectHits.==covered_cpgs[i]))
else
push!(representation,c)
end
end
if length(representation)<4
return missing
end
concordant = (all(representation) || all(.!representation))
return !concordant
end
function calculatestate(classified_reads,match_read_cpg,starts_cpgs,starts_reads,seqs_reads,overlapcopy)
p=[]
for i in classified_reads
#println(i)
a=classify_reads(i,match_read_cpg,starts_cpgs,starts_reads,seqs_reads,overlapcopy)
push!(p,a)
end
p
end
我的match_read_cpg(函数的第二个参数)是一个groupeddataframe,我需要每次循环其中的一个dataframe去做处理。在执行calculatestate这个函数时,我用了第一个for循环。在其中有一个a=classify_reads(i,match_read_cpg,starts_cpgs,starts_reads,seqs_reads,overlapcopy)函数,这个函数里面又有一个for循环。
我其实在classify_reads这个函数里面,在执行conver时想用广播,但是无奈我在执行后还要去判断,去删除overlapcopy里面的东西。所以就相当于嵌套了两个for。这一步就慢下来了,各位大佬能给看看怎么解决么?另外还有哪个地可以更改下提高性能呢?