# 关于并行计算的Bug?

``````# bug.jl

# 一个简单的计算任务
a = rand(size)
sum(a)
end

# 管理不同worker上的任务，并给出进度
# 总共的任务数量
# 获取远程进程数量
numof_workers = length(workers())
result = []
workersFuture = [Future(1) for i in 1:numof_workers]
# 记录完成任务ID在列表中的位置
c = Channel{Int64}(numof_workers)

progress = 0.0

# 同时启动numof_workers数量的进程
for i in 1:numof_workers
end

# i记录了下次数据在result中存放的位置
# (i - 1)即为已经记录的数据数量
# 并且(i - 1 + numof_workers)为已经启动的进程数
i = 1
test_num = 0
while  true

# 完成任务的进程序号
j = take!(c)
# 取出结果
push!(result, fetch(workersFuture[j]))
workersFuture[j] = Future(1)
println("up \$i #")

# test_num = test_num + 1
# println("test_num \$test_num")
## 进度条
if (i - 1)/num_of_task > 0.001 + progress
print("\r")
print(round(progress*100, digits = 1))
print("%")
end
## 进度条
else
break
end
i = i + 1
end

close(c)
print("  all done @")
println(now())
result
end
``````

``````using Distributed
using Dates
@everywhere include("./bug.jl")
my_tasks = [10^8 for i in 1:10];

``````

``````            # test_num = test_num + 1
# println("test_num \$test_num")
``````

bug好像就没了？？

• 卡在最后
• 正常结束

``````@async begin
put!(c, j)
end
``````

``````ERROR: BoundsError: attempt to access 10-element Vector{Int64} at index [11]
``````

`numof_workers`大于1的时候，`i + numof_workers`确实可能大于`num_of_task`

## 更改后的代码

``````# bug.jl

# 一个简单的计算任务
a = rand(size)
sum(a)
end

# 管理不同worker上的任务，并给出进度
# 总共的任务数量
# 获取远程进程数量
numof_workers = length(workers())
println(numof_workers)
result = []
workersFuture = [Future(1) for i in 1:numof_workers]
# 记录完成任务ID在列表中的位置
c = Channel{Int64}(numof_workers)

progress = 0.0

# 同时启动numof_workers数量的进程
for i in 1:numof_workers
# println("first")
end

# i记录了下次数据在result中存放的位置
# (i - 1)即为已经记录的数据数量
# 并且(i - 1 + numof_workers)为已经启动的进程数
i = 1
# while (i - 1 + numof_workers) < num_of_task
test_num = 0
while  true

# yield()
# 完成任务的进程序号
j = take!(c)
# 取出结果
push!(result, fetch(workersFuture[j]))
# println(result[i])
workersFuture[j] = Future(1)
println("up \$i #")
# println(workers()[j])
# println((i - 1 + numof_workers) < num_of_task)
# print(i + numof_workers)
# print(", j \$j \n")

if (i - 1 + numof_workers) < num_of_task
@async begin; println(i); put!(workersFuture[j], remotecall_fetch(cal_one_task, workers()[j], task[i + numof_workers])); put!(c, j); end
test_num = test_num + 1
# println(i)
# println("test_num \$test_num")
## 进度条
# if (i - 1)/num_of_task > 0.001 + progress
#     print("\r")
#     print(round(progress*100, digits = 1))
#     print("%")
# end
## 进度条
nothing
else
break
end
# println(length(result))
i = i + 1

end

close(c)
print("  all done @")
println(now())
result
end
``````

``````@async begin; println(i); put!(workersFuture[j], remotecall_fetch(cal_one_task, workers()[j], task[i + numof_workers])); put!(c, j); end
``````

## 运行

``````using Distributed
using Dates
@everywhere include("./bug.jl")
my_tasks = [10^8 for i in 1:10];
``````

## 输出

``````10
1
up 1 #
1
up 2 #
2
···
``````

• 如果在worker上出错，main process在wait，是不会显示错误信息的；

• 就源代码的话，Julia中的Channel本身就自带了blocking的，所以从理论上来说，完全不需要自己单独写一个`i`的循环然后跳出，何况还是类似于两个Channel一起使用。所以可能的话，考虑使用一个Channel，然后循环并加上`@async`。可以参考一下文档中的例子。

• 如果Channel是空的，`take!`的时候还是会继续等，等到有数据为止。

``````the_task = task[i + numof_workers]
the_id = workers()[j]
the_j = j