很简单的代码,为啥会有性能问题

各位好
很简单的代码,速度确很慢,我怀疑是结构中使用Ref的方法不对
大家帮忙看看,谢谢

mutable struct T3
  f1::Int64
  f2::String
  f3::Float64
  EvlP1::Ref{Float64}
  f5::Vector{Int64}
  function T3()
    this = new()
    this.EvlP1 = Ref{Float64}(3.0)
    return this
  end
end

function test() 
  t3=T3()
  #cc=0.1::Float64
  cc=Float64(0.1)
  for i = 1:10^8
    cc = cc +t3.EvlP1[]
  end
  println(cc)  
end

#@time test()
@code_warntype test()

下面是code_warntype出来的结果,有3个地方是红色的
Variables
#self#::Core.Compiler.Const(test, false)
t3::T3
cc::Any
@_4::Union{Nothing, Tuple{Int64,Int64}}
i::Int64

Body::Nothing
1 ── (t3 = Main.T3())
││ (cc = Main.Float64(0.1))
││ %3 = Core.apply_type(Base.Val, 8)::Core.Compiler.Const(Val{8}, false)
││ %4 = (%3)()::Core.Compiler.Const(Val{8}(), false)
││ %5 = Base.literal_pow(Main.:^, 10, %4)::Int64
││ %6 = (1:%5)::Core.Compiler.PartialStruct(UnitRange{Int64}, Any[Core.Compiler.Const(1, false), Int64])
││ (@_4 = Base.iterate(%6))
││ %8 = (@_4 === nothing)::Bool
││ %9 = Base.not_int(%8)::Bool
└└──── goto #4 if not %9
2 ┄┄ %11 = @_4::Tuple{Int64,Int64}::Tuple{Int64,Int64}
││ (i = Core.getfield(%11, 1))
││ %13 = Core.getfield(%11, 2)::Int64
││ %14 = cc::Any <--------------
││ %15 = Base.getproperty(t3, :EvlP1)::Ref{Float64} <--------------
││ %16 = Base.getindex(%15)::Any <--------------
││ (cc = %14 + %16)
││ (@_4 = Base.iterate(%6, %13))
││ %19 = (@_4 === nothing)::Bool
││ %20 = Base.not_int(%19)::Bool
└└──── goto #4 if not %20
3 ── goto #2
4 ┄┄ %23 = Main.println(cc)::Core.Compiler.Const(nothing, false)
└└──── return %23

有没有什么文章讲述怎样来排查类似这样的问题?

性能很差
3.660509 seconds (200.00 M allocations: 2.980 GiB, 5.72% gc time)

  1. 不要用 mutable
  2. 不清楚 Ref 这里为什么会类型不稳定,但是类型不稳定会严重影响速度
  3. for循环可以用 simd
julia> struct T3
         f1::Int64
         f2::String
         f3::Float64
         EvlP1::Float64
         f5::Vector{Int64}
       end

julia> T3() = T3(rand(Int), "", rand(Float64), rand(Float64), rand(Int, 2))
T3

julia> function test()
         t3=T3()
         cc = 0.1
         @simd for i = 1:10^8
           cc = cc + t3.EvlP1
         end
         cc
       end
test (generic function with 1 method)

julia> @btime test()
  5.332 ms (1 allocation: 96 bytes)

这些内容基本上都来源于 Performance Tips · The Julia Language

好的,非常感谢…