数据类型问题：Cannot `convert` an object of type Expr to an object of type Float64

ztemple · 2020 年5 月 18 日 15:45

Julia版本1.4.0，源代码如下

 using POMDPs, POMDPModelTools, POMDPPolicies, BeliefUpdaters, POMDPSimulators, Random
using QMDP
using ParticleFilters
using BasicPOMCP

struct State
   # Latent variables
   desired_autonomy::Bool

   # Observable variables
   performance::Bool
   given_autonomy::Bool

   # last engagement, to be used for reward
   engagement::Bool
end

# struct Act
#     give_autonomy::Bool
# end

struct Obs
   performance::Bool
   given_autonomy::Bool

   # Using duration (1 = engaged/'just right', 0 = too long / too short on task)
   #   as a proxy for engagement
   duration::Bool   # engagement
end

struct MOMDP <: POMDP{State, Symbol, Obs} #TODO mutable struct - ideally make p_ability change over time
   # CPT: P(u' | u, p, gu)
   # 上一时刻的状态（期望自治程度、表现、是否给予自主权） 影响 当前时刻的期望自治程度，即影响用户的内部状态
   p_autonomy_when_desired_good_given::Float64
   p_autonomy_when_desired_good_not_given::Float64
   p_autonomy_when_desired_bad_given::Float64
   p_autonomy_when_desired_bad_not_given::Float64
   p_autonomy_when_not_desired_good_given::Float64
   p_autonomy_when_not_desired_good_not_given::Float64
   p_autonomy_when_not_desired_bad_given::Float64
   p_autonomy_when_not_desired_bad_not_given::Float64

   # CPT: P(i' | u', gu')
   # 当前时刻的期望自治程度 + 当前时刻是否给予自主权   影响  下一时刻的注意力
   p_engaged_when_desired_given::Float64
   p_engaged_when_desired_not_given::Float64
   p_engaged_when_not_desired_given::Float64
   p_engaged_when_not_desired_not_given::Float64

   # For now, ability is a probabilistic constant for a student that determines performance independent of attempt
   # 把能力定义为一个概率常数，不随时间变化
   p_ability::Float64

   # Reward for being engaged ("just right", vs. took too long or too short ;
   #   using duration as a proxy for engagement)
   # 根据注意力给出的 reward
   r_engagement::Float64

   discount::Float64  #折扣因子
end

# Transition values from CPTs for default constructor
MOMDP() = MOMDP(0.9, 0.9, 0.3, 0.8, 0.8, 0.1, 0.01, 0.2,
               0.9, 0.3, 0.2, 0.9,
               0.5, # p_ability    TODO: draw from distribution (first pass: tune manually to see diffs)
               1.0, # r_engagement
               0.95 # discount
               )

POMDPs.discount(m::MOMDP) = m.discount

const num_states = 2*2*2*2
const num_actions = 2
const num_observations = 2*2*2
POMDPs.n_states(::MOMDP) = num_states
POMDPs.n_actions(::MOMDP) = num_actions
POMDPs.n_observations(::MOMDP) = num_observations

# States of MOMDP
const all_states = [State(desired_autonomy, performance, given_autonomy, engagement) for engagement = 0:1, performance = 0:1, given_autonomy = 0:1, desired_autonomy = 0:1]
POMDPs.states(m::MOMDP) = all_states
# println(all_states)
# println(all_states[4])
# println(all_states[12])

function POMDPs.stateindex(::MOMDP,s::State)    # 不清楚这个索引index的作用**************************************************************************************************
   # TODO: use sub2ind for efficiency
   return convert(Int64, s.desired_autonomy * 8 + s.performance * 4 + s.given_autonomy * 2 + s.engagement * 1 + 1)
end   #  这个 convert 的作用是把括号中后部一长串转化成整数吗？********************************************************************************************************
# convert()函数可能涉及到 Arcsine distribution反正弦分布

POMDPs.actions(m::MOMDP) = [:give_autonomy, :revoke_autonomy]

function POMDPs.actionindex(m::MOMDP, a::Symbol)   # actionindex 的作用********************************************************************************************
   if a == :give_autonomy
       return 1
   elseif a == :revoke_autonomy
       return 2
   end
   error("invalid MOMDP action: $a")
end

const all_observations = [Obs(performance, given_autonomy, duration) for performance = 0:1, given_autonomy = 0:1, duration = 0:1]
POMDPs.observations(m::MOMDP) = all_observations

# Observation is certain   即假设观测没有不确定性
# function observation_index()
# function observation(m::MOMDP, s::State)
#     println("调用了2参数的observation函数")
#     return SparseCat([Obs(s)], [1.0])
# end
# SparseCat(values, probabilities)  构造稀疏分类分布（不是很懂，但觉得应该是为了缩小values的空间大小）或者就是单纯地构造了观测的分布
#
# function observation(m::MOMDP, s::State, a::Symbol, sp::State)
#     return observation(m, a, sp)
# end
#
# function observation(m::MOMDP, a::Symbol, sp::State)
#     return observation(m, sp)
# end
# Transition function P(s' | s, a)
function POMDPs.transition(m::MOMDP, s::State, a::Symbol, rng::AbstractRNG=MersenneTwister(1))
   sp_desired_autonomy = true
   sp_engagement = true
   sp_performance = rand(rng) < m.p_ability ? true : false

   # Next latent state of desired autonomy P(u' | u, p, gu)
   # If user wants autonomy
   if s.desired_autonomy
       # Does well
       if s.performance
           # And we give them autonomy
           if a == :give_autonomy
               # Then the prob for next desired_autonomy, and the given autonomy, updated in the state
               p_sp_desired_autonomy = m.p_autonomy_when_desired_good_given
               sp_given_autonomy = true
           else
               p_sp_desired_autonomy = m.p_autonomy_when_desired_good_not_given
               sp_given_autonomy = false
           end
       else
           if a == :give_autonomy
               p_sp_desired_autonomy = m.p_autonomy_when_desired_bad_given
               sp_given_autonomy = true
           else
               p_sp_desired_autonomy = m.p_autonomy_when_desired_bad_not_given
               sp_given_autonomy = false
           end
       end
   else  # user doesnot wants autonomy
       if s.performance  # does well
           if a == :give_autonomy   # give autonomy
               p_sp_desired_autonomy = m.p_autonomy_when_not_desired_good_given
               sp_given_autonomy = true
           else
               p_sp_desired_autonomy = m.p_autonomy_when_not_desired_good_not_given
               sp_given_autonomy = false
           end
       else
           if a == :give_autonomy
               p_sp_desired_autonomy = m.p_autonomy_when_not_desired_bad_given
               sp_given_autonomy = true
           else
               p_sp_desired_autonomy = m.p_autonomy_when_not_desired_bad_not_given
               sp_given_autonomy = false
           end
       end
   end

   # Next engagement level P(i' | u', gu)
   if sp_given_autonomy
       p_sp_engagement_desired = m.p_engaged_when_desired_given
       p_sp_engagement_not_desired = m.p_engaged_when_not_desired_given
   else
       p_sp_engagement_desired = m.p_engaged_when_desired_not_given
       p_sp_engagement_not_desired = m.p_engaged_when_not_desired_not_given
   end

   # Let's say performance is a general ability that's constant throughout the curriculum for now
   # 假设performance是一种普遍的能力，在整个课程中是不变的
   p_sp_performance = m.p_ability

   sps = State[]
   probs = Float64[]
   push!(sps, State(sp_desired_autonomy, sp_performance, sp_given_autonomy, sp_engagement))
   push!(probs, p_sp_desired_autonomy * p_sp_engagement_desired * p_sp_performance)

   push!(sps, State(!sp_desired_autonomy, sp_performance, sp_given_autonomy, sp_engagement))
   push!(probs, (1.0 - p_sp_desired_autonomy) * p_sp_engagement_not_desired * p_sp_performance)

   push!(sps, State(sp_desired_autonomy, sp_performance, sp_given_autonomy, !sp_engagement))
   push!(probs, p_sp_desired_autonomy * (1.0 - p_sp_engagement_desired) * p_sp_performance)

   push!(sps, State(!sp_desired_autonomy, sp_performance, sp_given_autonomy, !sp_engagement))
   push!(probs, (1.0 - p_sp_desired_autonomy) * (1.0 - p_sp_engagement_not_desired) * p_sp_performance)

   push!(sps, State(sp_desired_autonomy, !sp_performance, sp_given_autonomy, sp_engagement))
   push!(probs,p_sp_desired_autonomy * p_sp_engagement_desired * (1.0 - p_sp_performance))

   push!(sps, State(!sp_desired_autonomy, !sp_performance, sp_given_autonomy, sp_engagement))
   push!(probs, (1.0 - p_sp_desired_autonomy) * p_sp_engagement_not_desired * (1.0 - p_sp_performance))

   push!(sps, State(sp_desired_autonomy, !sp_performance, sp_given_autonomy, !sp_engagement))
   push!(probs, p_sp_desired_autonomy * (1.0 - p_sp_engagement_desired) * (1.0 - p_sp_performance))

   push!(sps, State(!sp_desired_autonomy, !sp_performance, sp_given_autonomy, !sp_engagement))
   push!(probs, (1.0 - p_sp_desired_autonomy) * (1.0 - p_sp_engagement_not_desired) * (1.0 - p_sp_performance))

   # Debugging
   # print("\n######\n")
   # print(s, " desired_autonomy, performance, given_autonomy, engagement\n", a, "\n")
   # print(sps, "\n")
   # print(probs, "\n")
   # print("\n######\n")

   return SparseCat(sps, probs)  # SparseCat(values, probabilities)创建一个稀疏的分类分布
end

# Rewarded for being engaged  改善注意力有助于提高学习效率
function POMDPs.reward(m::MOMDP, s::State, a::Symbol)
   return s.engagement ? m.r_engagement : 0.0 #TODO: try -1.0 here
end

# initial_state_distribution(m::MOMDP) = SparseCat(states(m), ones(num_states) / num_states)
p_initially_motivated = 0.5 # 0.5 is uniform prior
#                        State{desired_autonomy,performance,given_autonomy,engagement}
init_state_dist = SparseCat([State(true, false, false, false), State(false, false, false, false)], [p_initially_motivated, 1.0-p_initially_motivated])
POMDPs.initial_state_distribution(m::MOMDP) = init_state_dist


# Solver
momdp = MOMDP()

# QMDP
# solver = QMDPSolver(max_iterations=20, belres=10.0, verbose=true)
solver = QMDPSolver(max_iterations=100, belres=1e-3, verbose=false)
# solve函数在DiscreteValueIteration包中的 vanilla.jl文件中
# 可以在这个文件中的solve函数里加一些println将想要看得更清楚的步骤显示出来
# 先在TigerPOMDP问题里试验一下
# 目前对钢琴问题里面啊α-向量求解过程还不是很清楚 ———————————————————————2020/1/6————————————————2020/1/6
policy = solve(solver, momdp) # solve()——>ValueIterationPolicy()
# print(policy)

# 构造一个顺序重要性重采样粒子滤波器
# 调用一个基础粒子滤波，实现POMDPs.jl的更新器接口
# 即filter的类型是Updater类型
filter = SIRParticleFilter(momdp, 10000) # 不知道这个滤波器是怎么起作用的，即如何实现更新器接口*****************************************************************

init_dist = initial_state_distribution(momdp)
init_belief = initialize_belief(filter, init_dist)

# input函数输出 true / false
# 可传入的参数为 performance、engagement
function input(ask::String="performance")::Bool
   if ask == "performance"
       prompt = "performed well? (y/n) "
   else
       prompt = "engaged? (y/n) "
   end
   print(prompt)
   user_input = chomp(readline()) # chomp(s)从字符串中删除一个尾随换行符
   if user_input == "n"
       return false
   else
       return true
   end
end

function unroll_particles(particles::ParticleFilters.ParticleCollection{State})
   d = [0.0 for i = 1:num_states]
   d = []    # 为什么对d这个数组有两个不同的定义？ *****************
   for i = 1:num_states
       push!(d, pdf(particles, all_states[i]))
   end
   return d
end

function generate_next_action(particle_belief::Any=init_belief, iteration::Int64=1)
   println("Step: ", iteration)
   belief = unroll_particles(particle_belief)
   println("Belief: ", belief)
   # println(policy.alphas)
   Alpha = policy.alphas
   value1 = 0
   value2 = 0
   i = 1

   while i <= 16
       value1 += belief[i]*Alpha[1][i]
       value2 += belief[i]*Alpha[2][i]
       i = i + 1
   end
   println("value of give_autonomy : ",value1)
   println("value of revoke_autonomy : ",value2)

   if value1 > value2     #  size(belief)[1] = 1
   # if action_idx == 1
       action = :give_autonomy
       action_val = true
   else
       action = :revoke_autonomy
       action_val = false
   end
   println("Next action is: ", action)
   # Input user's performance and engagement
   inputed_performance = input("performance")
   inputed_engagement = input("engagement")

   o = Obs(inputed_performance, action_val, inputed_engagement)
   next_belief = update(filter, particle_belief, action, o)
   return generate_next_action(next_belief, iteration + 1)
end

generate_next_action()

错误如下

WARNING: redefining constant all_states
WARNING: redefining constant all_observations
1
Belief: Any[0.4960999999999617, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5038999999999608, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
value of give_autonomy : 14.00374048624651
value of revoke_autonomy : 14.174663638724043
Next action is: revoke_autonomy
performed well? (y/n) y
engaged? (y/n) n
ERROR: LoadError: MethodError: Cannot `convert` an object of type Expr to an object of type Float64
Closest candidates are:
convert(::Type{T}, ::T) where T<:Number at number.jl:6
convert(::Type{T}, ::Number) where T<:Number at number.jl:7
convert(::Type{T}, ::Base.TwicePrecision) where T<:Number at twiceprecision.jl:250
...
Stacktrace:
[1] setindex!(::Array{Float64,1}, ::Expr, ::Int64) at .\array.jl:825
[2] reweight! at C:\Users\Administrator\.juliapro\JuliaPro_v1.4.0-1\packages\ParticleFilters\N26ol\src\pomdps.jl:10 [inlined]
[3] reweight! at C:\Users\Administrator\.juliapro\JuliaPro_v1.4.0-1\packages\ParticleFilters\N26ol\src\basic.jl:109 [inlined]
[4] update(::BasicParticleFilter{MOMDP,MOMDP,LowVarianceResampler,Random._GLOBAL_RNG,Array{State,1}}, ::ParticleCollection{State}, ::Symbol, ::Obs) at C:\Users\Administrator\.juliapro\JuliaPro_v1.4.0-1\packages\ParticleFilters\N26ol\src\basic.jl:52
[5] generate_next_action(::ParticleCollection{State}, ::Int64) at F:\bs\piano_momdp_solver-master\julia\1:310
[6] generate_next_action() at F:\bs\piano_momdp_solver-master\julia\1:279
[7] top-level scope at F:\bs\piano_momdp_solver-master\julia\1:314
in expression starting at F:\bs\piano_momdp_solver-master\julia\1:314

自搜了一下，应该是数据类型有问题，但是改了很多都不行，求助一下论坛大佬。（本科毕设阶段刚刚接触Julia，很多东西都不太懂，希望大佬指教）。

nesteiner · 2020 年5 月 18 日 22:53

错误信息好像不全，代码出错的地方没有标出来

ztemple · 2020 年5 月 19 日 05:46

已经改了，希望大佬解惑。

nesteiner · 2020 年5 月 19 日 06:14

看错误信息，从第314行出现错误，涉及的调用在第279,310行的generate_next_action

只能帮你这么多了，这些包我都没用过

ztemple · 2020 年5 月 19 日 06:23

好的，谢谢，我自己再琢磨一下