给你改了,速度大概提升了一倍吧,我想不出其他提升的方法了。我做的主要改变是使用了 @views
宏避免了一部分内存分配,又使用了一个 BitVector
来避免push
操作。至于多线程和分布式计算,我只会用 Thread.@threads
宏,具体用法看文档,这里我觉得提升不大。另外建议读一下文档的性能建议(觉得自己写的代码慢的时候,我也常常会去读)。
function test3(position=position, pos=pos)
N=size(position,1)
@views xlo, xhi = minimum(position[:, 1]), maximum(position[:, 1])
@views ylo, yhi = minimum(position[:, 2]), maximum(position[:, 2])
@views zlo, zhi = minimum(position[:, 3]), maximum(position[:, 3])
rp = 3.0825 ::Float64 #设置的平衡距离
# cellx, celly, cellz = xhi-xlo, yhi-ylo, zhi-zlo ::Float64
cells=Float64[xhi-xlo, yhi-ylo, zhi-zlo] #cells=[ cellx, celly, cellz]
Ls=rand(3) #Ls=[Lx,Ly,Lz]
marks = falses(N)
for i in 1:N
for j in 1:N
@views L = position[i, :] - pos[j, :]
for k in 1:3
Ls[k]=L[k] -floor(L[k]/cells[k]+0.5)*cells[k]
end
# Lx = L[1] - floor(L[1]/cellx+0.5)*cellx #考虑周期性边界
# Ly = L[2] - floor(L[2]/celly+0.5)*celly
# Lz = L[3] - floor(L[3]/cellz+0.5)*cellz
# r2 = (Lx^2 + Ly^2 + Lz^2)
r2=sum(a^2 for a in Ls)
if r2 < rp^2
marks[i]=true
break
end
end
end
return position[marks, :]
end
加入多线程
function test4(position=position, pos=pos)
N=size(position,1)
@views xlo, xhi = minimum(position[:, 1]), maximum(position[:, 1])
@views ylo, yhi = minimum(position[:, 2]), maximum(position[:, 2])
@views zlo, zhi = minimum(position[:, 3]), maximum(position[:, 3])
rp = 3.0825 ::Float64 #设置的平衡距离
cells=Float64[xhi-xlo, yhi-ylo, zhi-zlo] #cells=[ cellx, celly, cellz]
- Ls=rand(3)
marks = falses(N)
Threads.@threads for i in 1:N
+ Ls=rand(3) #移到这里,保证每个线程有一个自己的数组用于写入。
for j in 1:N
@views L = position[i, :] - pos[j, :]
for k in 1:3
Ls[k]=L[k] -floor(L[k]/cells[k]+0.5)*cells[k]
end
r2=sum(a^2 for a in Ls)
if r2 < rp^2
marks[i]=true
break
end
end
end
return position[marks, :]
end
放一下benchmark.
using BenchmarkTools
你原始代码的benchmark
julia>@benchmark test(position::Array{Float64,2}, pos::Array{Float64,2})::Array{Float64,2}
BenchmarkTools.Trial:
memory estimate: 303.19 MiB
allocs estimate: 2838215
--------------
minimum time: 89.933 ms (5.71% GC)
median time: 91.152 ms (6.75% GC)
mean time: 91.693 ms (6.70% GC)
maximum time: 100.230 ms (6.65% GC)
--------------
samples: 55
evals/sample: 1
提速后的 benchmark.
julia> @benchmark test3()
BenchmarkTools.Trial:
memory estimate: 101.05 MiB
allocs estimate: 946017
--------------
minimum time: 41.323 ms (5.88% GC)
median time: 42.436 ms (5.74% GC)
mean time: 42.685 ms (6.50% GC)
maximum time: 50.683 ms (4.46% GC)
--------------
samples: 118
evals/sample: 1
julia> @benchmark test4()
BenchmarkTools.Trial:
memory estimate: 101.24 MiB
allocs estimate: 947789
--------------
minimum time: 9.178 ms (0.00% GC)
median time: 10.537 ms (0.00% GC)
mean time: 35.980 ms (68.08% GC)
maximum time: 228.600 ms (93.30% GC)
--------------
samples: 139
evals/sample: 1
最后,新年快乐!