最近在优化一个问题, 今天看到 https://www.cs.purdue.edu/homes/hnassar/JPUG/performance.html , 跑了一下里面的例子, 结果发现一个出乎意外的事情, 就是两个方法生成的@code_native
都一样, 居然性能差别挺大, 来论坛求助一下。
(base) ➜ mcts git:(master) ✗ julia
_
_ _ _(_)_ | Documentation: https://docs.julialang.org
(_) | (_) (_) |
_ _ _| |_ __ _ | Type "?" for help, "]?" for Pkg help.
| | | | | | |/ _` | |
| | |_| | | | (_| | | Version 1.6.0 (2021-03-24)
_/ |\__'_|_|_|\__'_| | Official https://julialang.org/ release
|__/ |
julia> include("src/test-perf.jl")
volume(c1) == volume(c2) == volume(c3) = true
17.612 ns (1 allocation: 16 bytes)
6.010 ns (1 allocation: 16 bytes)
12.974 ns (1 allocation: 16 bytes)
1.7160000000000002
julia> @code_typed volume(c1)
CodeInfo(
1 ─ %1 = Base.getfield(c, :length)::Any
│ %2 = Base.getfield(c, :width)::Any
│ %3 = Base.getfield(c, :height)::Any
│ %4 = (%1 * %2 * %3)::Any
└── return %4
) => Any
julia> @code_typed volume(c2)
CodeInfo(
1 ─ %1 = Base.getfield(c, :length)::Float64
│ %2 = Base.getfield(c, :width)::Float64
│ %3 = Base.getfield(c, :height)::Float64
│ %4 = Base.mul_float(%1, %2)::Float64
│ %5 = Base.mul_float(%4, %3)::Float64
└── return %5
) => Float64
julia> @code_typed volume(c3)
CodeInfo(
1 ─ %1 = Base.getfield(c, :length)::Float64
│ %2 = Base.getfield(c, :width)::Float64
│ %3 = Base.getfield(c, :height)::Float64
│ %4 = Base.mul_float(%1, %2)::Float64
│ %5 = Base.mul_float(%4, %3)::Float64
└── return %5
) => Float64
julia> @code_native volume(c1)
.text
; ┌ @ test-perf.jl:6 within `volume'
pushq %rbp
movq %rsp, %rbp
pushq %rbx
andq $-32, %rsp
subq $96, %rsp
vxorps %xmm0, %xmm0, %xmm0
vmovaps %ymm0, (%rsp)
movq $0, 32(%rsp)
movq %rsi, 64(%rsp)
movq %fs:0, %rbx
movq $12, (%rsp)
movq -32768(%rbx), %rax
movq %rax, 8(%rsp)
movq %rsp, %rax
movq %rax, -32768(%rbx)
movq (%rsi), %rax
; │┌ @ Base.jl:33 within `getproperty'
movq (%rax), %rcx
movq 8(%rax), %rdx
movq 16(%rax), %rax
movq %rcx, 32(%rsp)
movq %rdx, 24(%rsp)
movq %rax, 16(%rsp)
; │└
movq %rcx, 40(%rsp)
movq %rdx, 48(%rsp)
movq %rax, 56(%rsp)
movabsq $jl_apply_generic, %rax
movabsq $jl_system_image_data, %rdi
leaq 40(%rsp), %rsi
movl $3, %edx
vzeroupper
callq *%rax
movq 8(%rsp), %rcx
movq %rcx, -32768(%rbx)
leaq -8(%rbp), %rsp
popq %rbx
popq %rbp
retq
; └
julia> @code_native volume(c2)
.text
; ┌ @ test-perf.jl:13 within `volume'
; │┌ @ Base.jl:33 within `getproperty'
vmovsd (%rdi), %xmm0 # xmm0 = mem[0],zero
; │└
; │┌ @ operators.jl:560 within `*' @ float.jl:332
vmulsd 8(%rdi), %xmm0, %xmm0
vmulsd 16(%rdi), %xmm0, %xmm0
; │└
retq
nop
; └
julia> @code_native volume(c3)
.text
; ┌ @ test-perf.jl:20 within `volume'
; │┌ @ Base.jl:33 within `getproperty'
vmovsd (%rdi), %xmm0 # xmm0 = mem[0],zero
; │└
; │┌ @ operators.jl:560 within `*' @ float.jl:332
vmulsd 8(%rdi), %xmm0, %xmm0
vmulsd 16(%rdi), %xmm0, %xmm0
; │└
retq
nop
; └
julia>
test-perf.jl
代码如下:
mutable struct Cube
length
width
height
end
volume(c::Cube) = c.length*c.width*c.height
mutable struct Cube_typed
length::Float64
width::Float64
height::Float64
end
volume(c::Cube_typed) = c.length*c.width*c.height
mutable struct Cube_parametric_typed{T <: Real}
length::T
width::T
height::T
end
volume(c::Cube_parametric_typed) = c.length*c.width*c.height
c1 = Cube(1.1,1.2,1.3)
c2 = Cube_typed(1.1,1.2,1.3)
c3 = Cube_parametric_typed(1.1,1.2,1.3)
@show volume(c1) == volume(c2) == volume(c3)
using BenchmarkTools
@btime volume(c1) # not typed
@btime volume(c2) # typed float
@btime volume(c3) # typed parametric
我感觉@btime volume(c2) # typed float
和@btime volume(c3) # typed parametric
应该一样的啊, 结果差挺多的。 然后我想说用@code_native
看看到底有啥不一样 ,结果发现完全一样的。。。。。