刚从Matlab转过来不久,发现Julia做数值计算真的很快,但是发现了点问题,这是一个格子玻尔兹曼模拟的大部分代码,当mainFlow(iterStepMax)里面的最大迭代次数iterStepMax增加的时候,内存分配次数越来越多,我不知道这是否意味着性能下降,或者会不会出现计算机内存不够的情况,因为有时候迭代次数达到几万后数据就会显示为NaN。查了很多资料,包括性能建议里面的输出预分配https://docs.juliacn.com/latest/manual/performance-tips/,感觉不得其法,做了一些很小的优化,速度提升了,但是性能分析的内存分配仍然会增大,任务管理器中的内存占用却一直是稳定的,现在针对这个代码有点无从下手的感觉。恳请好心人帮忙分析下,内存分配的问题需不需要解决,如何应对。第一次发帖,有什么不清楚的请原谅,代码如下
function mainFlow(iterStepMax)
Q = 9 #D2Q9
nx = 1200 #lattice number in horizontal direction
ny = 600 #lattice number in vertical direction
U = 0.1 #inlet velocity
cx = [1.0 0.0 -1.0 0.0 1.0 -1.0 -1.0 1.0 0.0] #velocity components in horizontal direction
cy = [0.0 1.0 0.0 -1.0 1.0 1.0 -1.0 -1.0 0.0] #velocity components in vertical direction
w = [1.0 / 9.0 1.0 / 9.0 1.0 / 9.0 1.0 / 9.0 1.0 / 36.0 1.0 / 36.0 1.0 / 36.0 1.0 / 36.0 4.0 / 9.0] #weight parameters
ρ = ones(Float64, nx, ny) #initial density
u = zeros(Float64, nx, ny) #horizontal velocity component
v = zeros(Float64, nx, ny) #vertical velocity component
u0 = zeros(Float64, nx, ny) #horizontal velocity component before iteration
v0 = zeros(Float64, nx, ny) #vertical velocity component befre iteration
uv = zeros(Float64, nx, ny) #resultant velocity
f = zeros(Float64, nx, ny, Q) #distribution function
feq = zeros(Float64, nx, ny, Q) #equilibrium distribution function
dx = 1.0 #horizontal lattice length
dy = 1.0 #vertical lattice length
lx = dx * nx #horizontal domain length
ly = dy * ny #vertical domain length
dt = dx #dt = 1.0
c = dx / dt #lattice sound speed
Re = 1000.0 #Reynoldz number
ν = U * ly / Re #kinematic viscousity
τ = 3.0ν + 0.5 #relaxation time
ω = 1.0 / τ #relaxation frequency
println("Re = ", Re, ", τ = ", τ) #instead of "Re = $Re, τ = $τ"
ε = 1.0e-6 #convergence
@views u[1, 2:ny-1] .= U
for iterStep = 1:iterStepMax
collision(nx, ny, Q, u, v, ρ, cx, cy, w, ω, f, feq)
streaming(Q, cx, cy, f)
boundaryCondition(nx, ny, u, v, ρ, U, f)
macroQuantity(nx, ny, Q, u, v, uv, u0, v0, ρ, cx, cy, f)
temp1 = sum((u[:, :] - u0[:, :]) .^ 2 + (v[:, :] - v0[:, :]) .^ 2)
temp2 = sum(u .^ 2 + v .^ 2)
err = sqrt(temp1) / sqrt(temp2 + 1e-30)
if err <= ε
break
end
end
end
function collision(nx, ny, Q, u, v, ρ, cx, cy, w, ω, f, feq)
for j = 1:ny, i = 1:nx
t1 = u[i, j] * u[i, j] + v[i, j] * v[i, j]
for k = 1:Q
t2 = u[i, j] * cx[k] + v[i, j] * cy[k]
feq[i, j, k] = ρ[i, j] * w[k] * (1.0 + 3.0t2 + 4.5 * t2^2 - 1.5t1)
f[i, j, k] = (1.0 - ω) * f[i, j, k] + ω * feq[i, j, k]
end
end
end
function streaming(Q, cx, cy, f)
for k = 1:Q
@views f[:, :, k] .= circshift(f[:, :, k], [cx[k], cy[k]])
end
end
function boundaryCondition(nx, ny, u, v, ρ, U, f)
#left
@views f[1, 2:ny-1, 1] = f[1, 2:ny-1, 3] + ρ[1, 2:ny-1] .* U * 2.0 / 3.0
@views f[1, 2:ny-1, 5] = f[1, 2:ny-1, 7] - 0.5 .* (f[1, 2:ny-1, 2] - f[1, 2:ny-1, 4]) + ρ[1, 2:ny-1] .* U / 6.0
@views f[1, 2:ny-1, 8] = f[1, 2:ny-1, 6] + 0.5 .* (f[1, 2:ny-1, 2] - f[1, 2:ny-1, 4]) + ρ[1, 2:ny-1] .* U / 6.0
@views u[1, 2:ny-1] .= U
@views v[1, 2:ny-1] .= 0.0
#right
@views f[nx, :, 3] = f[nx-1, :, 3]
@views f[nx, :, 6] = f[nx-1, :, 6]
@views f[nx, :, 7] = f[nx-1, :, 7]
#bottom
@views f[:, 1, 2] = f[:, 1, 4]
@views f[:, 1, 5] = f[:, 1, 7]
@views f[:, 1, 6] = f[:, 1, 8]
@views u[:, 1] .= 0.0
@views v[:, 1] .= 0.0
#top
@views f[:, ny, 4] = f[:, ny, 2]
@views f[:, ny, 8] = f[:, ny, 6]
@views f[:, ny, 7] = f[:, ny, 5]
@views u[:, ny] .= 0.0
@views v[:, ny] .= 0.0
end
function macroQuantity(nx, ny, Q, u, v, uv, u0, v0, ρ, cx, cy, f)
ρ[:, :] = sum(f, dims = 3)
u0[:, :] = u[:, :]
v0[:, :] = v[:, :]
for j = 1:ny, i = 1:nx
uSum = 0.0
vSum = 0.0
for k = 1:Q
uSum += f[i, j, k] * cx[k]
vSum += f[i, j, k] * cy[k]
end
u[i, j] = uSum / ρ[i, j]
v[i, j] = vSum / ρ[i, j]
end
uv[:, :] = @. sqrt(u[:, :]^2 + v[:, :]^2)
end
性能工具测试效果如下
@benchmark mainFlow(50)
BenchmarkTools.Trial:
memory estimate: 7.38 GiB
allocs estimate: 10801
--------------
minimum time: 6.675 s (10.88% GC)
median time: 6.675 s (10.88% GC)
mean time: 6.675 s (10.88% GC)
maximum time: 6.675 s (10.88% GC)
--------------
samples: 1
evals/sample: 1
增大迭代次数
@benchmark mainFlow(100)
BenchmarkTools.Trial:
memory estimate: 14.63 GiB
allocs estimate: 21551
--------------
minimum time: 12.903 s (10.27% GC)
median time: 12.903 s (10.27% GC)
mean time: 12.903 s (10.27% GC)
maximum time: 12.903 s (10.27% GC)
--------------
samples: 1
evals/sample: 1
版本信息
Win10,64位,Julia 1.3.0,编辑器用的ATOM。
如果有其他能够改善计算性能的建议,提前感谢!