某个函数在运行后首次调用会花费大量时间、占用大量内存,第二次之后就能够如期快速执行,且若置于“true ? 1 : ”之后仍会如此,这是为什么?

我在我运行的程序中发现,某个函数包含多次循环,该函数第一次执行会花费大量时间并消耗大量内存,而第二次之后的执行都很迅速。(使用Python执行相同算法时不出现此问题。)

以下是两次执行@time报告:

START
  9.900494 seconds (18.60 M allocations: 1.008 GiB, 1.57% gc time)

  0.032444 seconds (35.94 k allocations: 1.553 MiB)
END

经过调查,函数花费的时间主要在被反复执行的以下语句:

check(Ari(o1, za, Ari(o2, zb, Ari(o3, zc, zd))), results)

如果我将这句改为

1

那么函数就可以很快运行,结果如下:

START   
  0.006238 seconds (9.00 k allocations: 892.812 KiB)

  0.000589 seconds (9.00 k allocations: 892.484 KiB)
END

但是如果改为:

true ? 1 : check(Ari(o1, za, Ari(o2, zb, Ari(o3, zc, zd))), results)

那么还是会执行很长时间,即使冒号后的部分肯定不会执行

START   
  6.606485 seconds (18.01 M allocations: 1007.268 MiB, 2.32% gc time)

  0.001203 seconds (9.24 k allocations: 896.281 KiB)
END

所以我就很想不清楚,为什么这一句在第一次调用函数中执行很长时间并花费大量内存?而且,即使在它存在但肯定被忽略的情况下,依旧会花费时间并占用内存?

我接触Julia并不多久,难免有些疏漏,感谢大家帮助。

完整代码
revm(x,y)=y-x
struct Ari
    o::Function # can be +,-,*,/,revm,\
    x::Union{Ari,Real}
    y::Union{Ari,Real}
end
AbstractAri = Union{Ari, Real}

function transfer(o::Function,x::AbstractAri,y::AbstractAri)
    o==revm ? (-,y,x) :
    o==(\) ? (/,y,x) :
    (o,x,y)
end

function result(ins::Ari)::Union{Real,Nothing}
    o, x, y=ins.o,Real(ins.x),Real(ins.y)
    (o==(/) && y==0 || o==(\) && x==0) ? nothing :
    (x==nothing || y==nothing) ? nothing :
    o(x::Real, y::Real)
end

Real(ins::Ari) = result(ins::Ari)

isHigherThan(self::AbstractAri, other::AbstractAri)=false
isHigherThan(self::Ari, other::Ari)::Bool=self.o in (*,/,\) && other.o in (+,-,revm)

isSameClass(self::AbstractAri, other::AbstractAri)=false
isSameClass(self::Ari, other::Ari) = (self.o in (*,/,\)) == (other.o in (*,/,\))

function Base.string(ins::Ari)::String
    o,x,y=transfer(ins.o,ins.x,ins.y)
    part1, part2 = string.((x,y))
    if isHigherThan(ins,x)
        part1 = "($part1)"
    end
    if isHigherThan(ins,y)
        part2 = "($part2)"
    elseif isSameClass(ins,y) && o in (-, /)
        part2 = "($part2)"
    end
    return "$(part1)$(o)$(part2)"
end

function Base.:(==)(self::Ari, other::Ari)::Bool
    o1, x1, y1 = transfer(self.o,self.x,self.y)
    o2, x2, y2 = transfer(other.o,other.x,other.y)
    o1 == o2 && ((x1==x2)&&(y1==y2)||(o1 in (+, *) && x1==y2 && x2==y1))
end

function mess(f::Function, a::Int, b::Int, c::Int, d::Int)::Nothing
    ct = [a,b,c,d]
    for z1 = 1:4, z2 = 1:3
        s = Array{Int8,1}([1,2,3,4])
        za = ct[s[z1]]
        deleteat!(s,z1)
        u = copy(s)
        zb = ct[u[z2]]
        deleteat!(u,z2)
        zc = ct[u[1]]
        zd = ct[u[2]]
        f(za::Int,zb::Int,zc::Int,zd::Int)
        f(za::Int,zb::Int,zc::Int,zd::Int)
    end
end

check(ins::Ari, results::Array{Ari,1}) = result(ins::Ari) == 24 && ins ∉ results ? push!(results, ins) : nothing; nothing

function solve(a::Real,b::Real,c::Real,d::Real)::Nothing
    results = Array{Ari,1}()
    os = (+,-,revm,*,/,\)
    for o1 in os, o2 in os, o3 in os
        mess(a,b,c,d) do za::Int,zb::Int,zc::Int,zd::Int
            check(Ari(o1, za, Ari(o2, zb, Ari(o3, zc, zd))), results)
        end
    end
    for o1 in (*,/,\), o2 in (*,/,\), o3 in (+,-,revm)
        mess(a,b,c,d) do za::Int,zb::Int,zc::Int,zd::Int
            check(Ari(o3, Ari(o1, za, zb), Ari(o2, zc, zd)), results)
        end
    end
    if length(results)>0
        for ins in results
            println("$(string(ins)) = $(result(ins))")
        end
    else
        println("No results.")
    end
end

println("START")
@time solve(4,4,4,4)
println()
@time solve(4,4,4,4)
println("END")

用这个

using BenchmarkTools
@btime your code

Julia 是即时编译的,一个函数执行第一次时会编译并缓存下来(python就直接丢了),因而第一次运行时间会异常地长。如果要判断实际资源消耗,像楼上说的用 BenchmarkTools 之类的包,可以获得不含编译的时间。

1赞

@btime 的结果如下:

START   
  26.164 ms (35894 allocations: 1.55 MiB)

  26.862 ms (35894 allocations: 1.55 MiB)
END

函数即时编译可以理解,但是不至于一行check(Ari(o1, za, Ari(o2, zb, Ari(o3, zc, zd))), results)要编译6~7秒吧。

for o1 in os, o2 in os, o3 in os
        mess(a,b,c,d) do za::Int,zb::Int,zc::Int,zd::Int
            check(Ari(o1, za, Ari(o2, zb, Ari(o3, zc, zd))), results)
        end
    end
    for o1 in (*,/,\), o2 in (*,/,\), o3 in (+,-,revm)
        mess(a,b,c,d) do za::Int,zb::Int,zc::Int,zd::Int
            check(Ari(o3, Ari(o1, za, zb), Ari(o2, zc, zd)), results)
        end
    end

修改成

mess(a, b, c, d) do za::Int, zb::Int, zc::Int, zd::Int
        for o1 in os, o2 in os, o3 in os
            check(Ari(o1, za, Ari(o2, zb, Ari(o3, zc, zd))), results)
        end
        for o1 in (*, /, \), o2 in (*, /, \), o3 in (+, -, revm)
            check(Ari(o3, Ari(o1, za, zb), Ari(o2, zc, zd)), results)
        end
    end

之后就解决了。虽然感觉很神奇,但是确实就执行很快。@time 的效果如下:

START   
  0.068882 seconds (33.30 k allocations: 1.048 MiB)

  0.043584 seconds (26.70 k allocations: 695.859 KiB)
END

可能是由于你这个代码是在全局环境下执行的,所以os的类型不稳定

+let os=os, a=a, b=b, c=c, d=d
for o1 in os, o2 in os, o3 in os
    mess(a,b,c,d) do za::Int,zb::Int,zc::Int,zd::Int
        check(Ari(o1, za, Ari(o2, zb, Ari(o3, zc, zd))), results)
    end
end

for o1 in (*,/,\), o2 in (*,/,\), o3 in (+,-,revm)
    mess(a,b,c,d) do za::Int,zb::Int,zc::Int,zd::Int
        check(Ari(o3, Ari(o1, za, zb), Ari(o2, zc, zd)), results)
    end
end
+end

Measure performance with @time and pay attention to memory allocation

然而按照你提供的解决方案,问题仍会存在:

START
 10.102811 seconds (18.75 M allocations: 1.016 GiB, 1.21% gc time)

  0.048649 seconds (43.97 k allocations: 1.795 MiB)
END

哦应该还需要把 results 添加进去到 let 的列表里,这里是通过let把全局变量转换为局部变量。

@time let os=os, a=a, b=b, c=c, d=d, results=results
for o1 in os, o2 in os, o3 in os
    mess(a,b,c,d) do za::Int,zb::Int,zc::Int,zd::Int
        check(Ari(o1, za, Ari(o2, zb, Ari(o3, zc, zd))), results)
    end
end

for o1 in (*,/,\), o2 in (*,/,\), o3 in (+,-,revm)
    mess(a,b,c,d) do za::Int,zb::Int,zc::Int,zd::Int
        check(Ari(o3, Ari(o1, za, zb), Ari(o2, zc, zd)), results)
    end
end
end

很不幸的是,我按照你提供的方案,问题仍会发生:

START
  9.876631 seconds (18.58 M allocations: 1.007 GiB, 1.27% gc time)

  0.029693 seconds (36.27 k allocations: 1.555 MiB)
END

我认为这个并不是代码作用域的问题。os、results等变量,在函数内都是局部变量。而且,即使类型不稳定,也不会导致

true ? 1 : check(Ari(o1, za, Ari(o2, zb, Ari(o3, zc, zd))), results)

仍会消耗大量时间的现象。

京ICP备17009874号-2