一个算rpa的程序,因为不是很懂计算机的东西,第一次写并行计算只是照着网上的简单的加了两个宏,发现这个程序运行出的结果里使用的线程数越多,输出的值越小,虽然区别不是太大,但是这个误差是存在的是每个循环分别在SharedArray里写东西的时候阻塞写不上了吗?请问该怎么解决呢?
using Plots
using Base.Threads
using Distributed
#using LinearAlgebra
using DelimitedFiles
if nworkers()==1
addprocs(8)
end
@everywhere using SharedArrays
@everywhere using LinearAlgebra
#费米分布函数(T固定为15K)
@everywhere function Nf(energy::Float64)
beta=50
1/(exp(beta*energy)+1)
end
@everywhere function dNf(energy::Float64)
beta=50
-beta*exp(beta*energy)/(exp(beta*energy)+1)^2
end
@everywhere function htb(kx::Float64,ky::Float64,kz::Float64)#给出tb哈密顿量(矩阵)
#参数部分
tx11 = -0.3870
txxy11 = 0
txy11 = 0
txx11 = -0.034
tz11 = -0.8591
txz11 = 0.0107
txyz11 = 0.025
tzz11 = 0.0904
tx22 = 0.3202
txy22 = -0.0467
txx22 = 0.0367
tz22 = 0.3216
txz22 = -0.1438
txyz22 = 0.0496
tzz22 = -0.0327
txxz22 = -0.0209
txxy22 = -0.0198
txxyz22 = 0.0164
txxx22 = 0.012
txy12 = 0.0798
txyz12 = -0.0669
txyzz12 = 0.0094
tx33 = -0.3761
txy33 = 0.0844
txx33 = -0.0414
txyy33 = -0.0043
txxyy33 = 0.0030
tz33 = -0.0368
txz33 = -0.0019
txyz33 = 0.0117
tzz33 = 0.0080
txxy13 = 0.0219
txxy23 = -0.0139
ep1=8.9506
ep2 = 9.0277
ep3 = 6.8979
mu = 6.5814
#矩阵元部分
htb=Array{Float64,2}(undef,3,3)
htb[1,1] = ep1 - mu + 2*tx11 * (cos(kx) + cos(ky)) + 4*txy11*cos(kx)*cos(ky) + 2*txx11*(cos(2*kx) + cos(2*ky)) + 4*txxy11*(cos(2*kx)*cos(2*ky) + cos(kx)*cos(2*ky)) + 2*tz11*cos(kz) + 2*tzz11*cos(2*kz) + 4*txz11*cos(kz)*(cos(kx) + cos(ky)) + 8*txyz11*cos(kx)*cos(ky)*cos(kz)
htb[2,2] = ep2 - mu + 2*tx22*(cos(kx) + cos(ky)) + 4*txy22*cos(kx)*cos(ky) + 2*txx22*(cos(2*kx) + cos(2*ky)) + 4*txxy22*(cos(2*kx)*cos(ky) + cos(kx)*cos(2*ky)) + 2*tz22*cos(kz) + 2*tzz22*cos(2*kz) + 4*txz22*cos(kz)*(cos(kx) + cos(ky)) + 8*txyz22*cos(kx)*cos(ky)*cos(kz) + 4*txxz22*cos(kz)*(cos(2*kx) + cos(2*ky)) + 8*txxyz22*cos(kz)*(cos(2*kx)*cos(ky) + cos(kx)*cos(2*ky)) + 2*txxx22*(cos(3*kx) + cos(3*ky))
htb[3,3] = ep3 - mu + 2*tx33*(cos(kx) + cos(ky)) + 4*txy33*cos(kx)*cos(ky) + 2*txx33*( cos(2*kx) + cos(2*ky)) + 4*txyy33*(cos(2*kx)*cos(ky) + cos(kx)*cos(2*ky)) + 4*txxyy33*(cos(2*kx)*cos(2*ky)) + 2*tz33*cos(kz) + 2*tzz33*cos(2*kz) + 4*txz33*cos(kz)*(cos(kx) + cos(ky)) + 8*txyz33*cos(kx)*cos(ky)*cos(kz)
htb[1,2] = -4*txy12*sin(kx)*sin(ky) - 8*txyz12*sin(kx)*sin(ky)*cos(kz) - 8*txyzz12*sin(kx)*sin(ky)*cos(2*kz)
htb[1,3] = 8*txxy13*cos(kz/2)*(cos(3*kx/2)*cos(ky/2) - cos(kx/2)*cos(3*ky/2))
htb[2,3] = -8*txxy23*cos(kz/2)*(sin(3*kx/2)*sin(ky/2) - sin(kx/2)*sin(3*ky/2))
htb[2,1] = htb[1,2]
htb[3,1] = htb[1,3]
htb[3,2] = htb[2,3]
htb
#[LinearAlgebra.eigvals(htb),LinearAlgebra.eigvecs(htb)]
end
#band给出能量 overlap给出本征态,这个band和overlap是对应的。
@everywhere function band(kx::Float64,ky::Float64,kz::Float64)
eigvals(htb(kx,ky,kz))
end
@everywhere function overlap(kx::Float64,ky::Float64,kz::Float64)
eigvecs(htb(kx,ky,kz))
end
@everywhere function l23(i::Int)
if i==5
3
elseif i<3
1
else 2
end
end
@everywhere function l14(i::Int)
if i==5
3
elseif i%2==1
1
else
2
end
end
function χbare(qx::Float64,qy::Float64,qz::Float64,ω::Float64,L)#算spin的sus
bzone=SharedArray(zeros(Float64,(2L)^3,3))
for i in -L+1:L
for j in -L+1:L
for k in -L+1:L
bzone[(i-1+L)*(2L)^2+(j-1+L)*(2L)+k+L,:]=[i*pi/L,j*pi/L,k*pi/L]
end
end
end
#println(bzone[8L^3,:])
ω=0.0
#算χ0
χ0=SharedArray(zeros(Float64,5,5))
sm=0.0
@sync @distributed for m in 1:(2L)^3 #这个循环好大,不知道咋优化,重点debug
#subchi=SharedArray(zeros(Float64,5,5) ##subchi 在特定kx,ky,kz点上的chi值 4 4矩阵
kx,ky,kz=bzone[m,:][1],bzone[m,:][2],bzone[m,:][3]
a=overlap(kx,ky,kz)
b=overlap(kx+qx,ky+qy,kz+qz)
e1=band(kx,ky,kz)
e2=band(kx+qx,ky+qy,kz+qz)
for i in 1:5
for j in 1:5 ## i,j orbital index
subsubchi=0.0 ## subsubchi是在ij点上的 一次运算中的chi 之间用subchi【ij】用一个数就行了 0
#tbk=htb(kx,ky,kz)
#tbkq=htb(kx+qx,ky+qy,kz+qz)
#a=tbk[2]
#b=tbkq[2]
#e1=tbk[1]
#e2=tbkq[2]
for p in 1:3
for q in 1:3
if abs(e1[p]-e2[q])>0.0001
subsubchi+=a[l14(i),p]*conj(a[l23(i),p])*b[l14(j),q]*conj(b[l23(j),q])*(Nf(e2[q])-Nf(e1[p]))/(e2[q]-e1[p])# 这两个差的小要 对配分函数要求个偏导
else
subsubchi+=a[l14(i),p]*conj(a[l23(i),p])*b[l14(j),q]*conj(b[l23(j),q])*dNf(e1[p])*(e2[q]-e1[q])
end
end
end
χ0[i,j]+=subsubchi ##这里也是不需要用i,j用数就行了
end
end
#println(m)
end
χ0=-χ0/(2L)^3
end
χbare(1.2,1.3,2.6,0.0,20)
# code