Julia版本1.4.0,源代码如下
using POMDPs, POMDPModelTools, POMDPPolicies, BeliefUpdaters, POMDPSimulators, Random
using QMDP
using ParticleFilters
using BasicPOMCP
struct State
# Latent variables
desired_autonomy::Bool
# Observable variables
performance::Bool
given_autonomy::Bool
# last engagement, to be used for reward
engagement::Bool
end
# struct Act
# give_autonomy::Bool
# end
struct Obs
performance::Bool
given_autonomy::Bool
# Using duration (1 = engaged/'just right', 0 = too long / too short on task)
# as a proxy for engagement
duration::Bool # engagement
end
struct MOMDP <: POMDP{State, Symbol, Obs} #TODO mutable struct - ideally make p_ability change over time
# CPT: P(u' | u, p, gu)
# 上一时刻的状态(期望自治程度、表现、是否给予自主权) 影响 当前时刻的期望自治程度,即影响用户的内部状态
p_autonomy_when_desired_good_given::Float64
p_autonomy_when_desired_good_not_given::Float64
p_autonomy_when_desired_bad_given::Float64
p_autonomy_when_desired_bad_not_given::Float64
p_autonomy_when_not_desired_good_given::Float64
p_autonomy_when_not_desired_good_not_given::Float64
p_autonomy_when_not_desired_bad_given::Float64
p_autonomy_when_not_desired_bad_not_given::Float64
# CPT: P(i' | u', gu')
# 当前时刻的期望自治程度 + 当前时刻是否给予自主权 影响 下一时刻的注意力
p_engaged_when_desired_given::Float64
p_engaged_when_desired_not_given::Float64
p_engaged_when_not_desired_given::Float64
p_engaged_when_not_desired_not_given::Float64
# For now, ability is a probabilistic constant for a student that determines performance independent of attempt
# 把能力定义为一个概率常数,不随时间变化
p_ability::Float64
# Reward for being engaged ("just right", vs. took too long or too short ;
# using duration as a proxy for engagement)
# 根据注意力给出的 reward
r_engagement::Float64
discount::Float64 #折扣因子
end
# Transition values from CPTs for default constructor
MOMDP() = MOMDP(0.9, 0.9, 0.3, 0.8, 0.8, 0.1, 0.01, 0.2,
0.9, 0.3, 0.2, 0.9,
0.5, # p_ability TODO: draw from distribution (first pass: tune manually to see diffs)
1.0, # r_engagement
0.95 # discount
)
POMDPs.discount(m::MOMDP) = m.discount
const num_states = 2*2*2*2
const num_actions = 2
const num_observations = 2*2*2
POMDPs.n_states(::MOMDP) = num_states
POMDPs.n_actions(::MOMDP) = num_actions
POMDPs.n_observations(::MOMDP) = num_observations
# States of MOMDP
const all_states = [State(desired_autonomy, performance, given_autonomy, engagement) for engagement = 0:1, performance = 0:1, given_autonomy = 0:1, desired_autonomy = 0:1]
POMDPs.states(m::MOMDP) = all_states
# println(all_states)
# println(all_states[4])
# println(all_states[12])
function POMDPs.stateindex(::MOMDP,s::State) # 不清楚这个索引index的作用**************************************************************************************************
# TODO: use sub2ind for efficiency
return convert(Int64, s.desired_autonomy * 8 + s.performance * 4 + s.given_autonomy * 2 + s.engagement * 1 + 1)
end # 这个 convert 的作用是把括号中后部一长串转化成整数吗?********************************************************************************************************
# convert()函数可能涉及到 Arcsine distribution反正弦分布
POMDPs.actions(m::MOMDP) = [:give_autonomy, :revoke_autonomy]
function POMDPs.actionindex(m::MOMDP, a::Symbol) # actionindex 的作用********************************************************************************************
if a == :give_autonomy
return 1
elseif a == :revoke_autonomy
return 2
end
error("invalid MOMDP action: $a")
end
const all_observations = [Obs(performance, given_autonomy, duration) for performance = 0:1, given_autonomy = 0:1, duration = 0:1]
POMDPs.observations(m::MOMDP) = all_observations
# Observation is certain 即假设观测没有不确定性
# function observation_index()
# function observation(m::MOMDP, s::State)
# println("调用了2参数的observation函数")
# return SparseCat([Obs(s)], [1.0])
# end
# SparseCat(values, probabilities) 构造稀疏分类分布(不是很懂,但觉得应该是为了缩小values的空间大小)或者就是单纯地构造了观测的分布
#
# function observation(m::MOMDP, s::State, a::Symbol, sp::State)
# return observation(m, a, sp)
# end
#
# function observation(m::MOMDP, a::Symbol, sp::State)
# return observation(m, sp)
# end
# Transition function P(s' | s, a)
function POMDPs.transition(m::MOMDP, s::State, a::Symbol, rng::AbstractRNG=MersenneTwister(1))
sp_desired_autonomy = true
sp_engagement = true
sp_performance = rand(rng) < m.p_ability ? true : false
# Next latent state of desired autonomy P(u' | u, p, gu)
# If user wants autonomy
if s.desired_autonomy
# Does well
if s.performance
# And we give them autonomy
if a == :give_autonomy
# Then the prob for next desired_autonomy, and the given autonomy, updated in the state
p_sp_desired_autonomy = m.p_autonomy_when_desired_good_given
sp_given_autonomy = true
else
p_sp_desired_autonomy = m.p_autonomy_when_desired_good_not_given
sp_given_autonomy = false
end
else
if a == :give_autonomy
p_sp_desired_autonomy = m.p_autonomy_when_desired_bad_given
sp_given_autonomy = true
else
p_sp_desired_autonomy = m.p_autonomy_when_desired_bad_not_given
sp_given_autonomy = false
end
end
else # user doesnot wants autonomy
if s.performance # does well
if a == :give_autonomy # give autonomy
p_sp_desired_autonomy = m.p_autonomy_when_not_desired_good_given
sp_given_autonomy = true
else
p_sp_desired_autonomy = m.p_autonomy_when_not_desired_good_not_given
sp_given_autonomy = false
end
else
if a == :give_autonomy
p_sp_desired_autonomy = m.p_autonomy_when_not_desired_bad_given
sp_given_autonomy = true
else
p_sp_desired_autonomy = m.p_autonomy_when_not_desired_bad_not_given
sp_given_autonomy = false
end
end
end
# Next engagement level P(i' | u', gu)
if sp_given_autonomy
p_sp_engagement_desired = m.p_engaged_when_desired_given
p_sp_engagement_not_desired = m.p_engaged_when_not_desired_given
else
p_sp_engagement_desired = m.p_engaged_when_desired_not_given
p_sp_engagement_not_desired = m.p_engaged_when_not_desired_not_given
end
# Let's say performance is a general ability that's constant throughout the curriculum for now
# 假设performance是一种普遍的能力,在整个课程中是不变的
p_sp_performance = m.p_ability
sps = State[]
probs = Float64[]
push!(sps, State(sp_desired_autonomy, sp_performance, sp_given_autonomy, sp_engagement))
push!(probs, p_sp_desired_autonomy * p_sp_engagement_desired * p_sp_performance)
push!(sps, State(!sp_desired_autonomy, sp_performance, sp_given_autonomy, sp_engagement))
push!(probs, (1.0 - p_sp_desired_autonomy) * p_sp_engagement_not_desired * p_sp_performance)
push!(sps, State(sp_desired_autonomy, sp_performance, sp_given_autonomy, !sp_engagement))
push!(probs, p_sp_desired_autonomy * (1.0 - p_sp_engagement_desired) * p_sp_performance)
push!(sps, State(!sp_desired_autonomy, sp_performance, sp_given_autonomy, !sp_engagement))
push!(probs, (1.0 - p_sp_desired_autonomy) * (1.0 - p_sp_engagement_not_desired) * p_sp_performance)
push!(sps, State(sp_desired_autonomy, !sp_performance, sp_given_autonomy, sp_engagement))
push!(probs,p_sp_desired_autonomy * p_sp_engagement_desired * (1.0 - p_sp_performance))
push!(sps, State(!sp_desired_autonomy, !sp_performance, sp_given_autonomy, sp_engagement))
push!(probs, (1.0 - p_sp_desired_autonomy) * p_sp_engagement_not_desired * (1.0 - p_sp_performance))
push!(sps, State(sp_desired_autonomy, !sp_performance, sp_given_autonomy, !sp_engagement))
push!(probs, p_sp_desired_autonomy * (1.0 - p_sp_engagement_desired) * (1.0 - p_sp_performance))
push!(sps, State(!sp_desired_autonomy, !sp_performance, sp_given_autonomy, !sp_engagement))
push!(probs, (1.0 - p_sp_desired_autonomy) * (1.0 - p_sp_engagement_not_desired) * (1.0 - p_sp_performance))
# Debugging
# print("\n######\n")
# print(s, " desired_autonomy, performance, given_autonomy, engagement\n", a, "\n")
# print(sps, "\n")
# print(probs, "\n")
# print("\n######\n")
return SparseCat(sps, probs) # SparseCat(values, probabilities)创建一个稀疏的分类分布
end
# Rewarded for being engaged 改善注意力有助于提高学习效率
function POMDPs.reward(m::MOMDP, s::State, a::Symbol)
return s.engagement ? m.r_engagement : 0.0 #TODO: try -1.0 here
end
# initial_state_distribution(m::MOMDP) = SparseCat(states(m), ones(num_states) / num_states)
p_initially_motivated = 0.5 # 0.5 is uniform prior
# State{desired_autonomy,performance,given_autonomy,engagement}
init_state_dist = SparseCat([State(true, false, false, false), State(false, false, false, false)], [p_initially_motivated, 1.0-p_initially_motivated])
POMDPs.initial_state_distribution(m::MOMDP) = init_state_dist
# Solver
momdp = MOMDP()
# QMDP
# solver = QMDPSolver(max_iterations=20, belres=10.0, verbose=true)
solver = QMDPSolver(max_iterations=100, belres=1e-3, verbose=false)
# solve函数在DiscreteValueIteration包中的 vanilla.jl文件中
# 可以在这个文件中的solve函数里加一些println将想要看得更清楚的步骤显示出来
# 先在TigerPOMDP问题里试验一下
# 目前对钢琴问题里面啊α-向量求解过程还不是很清楚 ———————————————————————2020/1/6————————————————2020/1/6
policy = solve(solver, momdp) # solve()——>ValueIterationPolicy()
# print(policy)
# 构造一个顺序重要性重采样粒子滤波器
# 调用一个基础粒子滤波,实现POMDPs.jl的更新器接口
# 即filter的类型是Updater类型
filter = SIRParticleFilter(momdp, 10000) # 不知道这个滤波器是怎么起作用的,即如何实现更新器接口*****************************************************************
init_dist = initial_state_distribution(momdp)
init_belief = initialize_belief(filter, init_dist)
# input函数输出 true / false
# 可传入的参数为 performance、engagement
function input(ask::String="performance")::Bool
if ask == "performance"
prompt = "performed well? (y/n) "
else
prompt = "engaged? (y/n) "
end
print(prompt)
user_input = chomp(readline()) # chomp(s)从字符串中删除一个尾随换行符
if user_input == "n"
return false
else
return true
end
end
function unroll_particles(particles::ParticleFilters.ParticleCollection{State})
d = [0.0 for i = 1:num_states]
d = [] # 为什么对d这个数组有两个不同的定义? *****************
for i = 1:num_states
push!(d, pdf(particles, all_states[i]))
end
return d
end
function generate_next_action(particle_belief::Any=init_belief, iteration::Int64=1)
println("Step: ", iteration)
belief = unroll_particles(particle_belief)
println("Belief: ", belief)
# println(policy.alphas)
Alpha = policy.alphas
value1 = 0
value2 = 0
i = 1
while i <= 16
value1 += belief[i]*Alpha[1][i]
value2 += belief[i]*Alpha[2][i]
i = i + 1
end
println("value of give_autonomy : ",value1)
println("value of revoke_autonomy : ",value2)
if value1 > value2 # size(belief)[1] = 1
# if action_idx == 1
action = :give_autonomy
action_val = true
else
action = :revoke_autonomy
action_val = false
end
println("Next action is: ", action)
# Input user's performance and engagement
inputed_performance = input("performance")
inputed_engagement = input("engagement")
o = Obs(inputed_performance, action_val, inputed_engagement)
next_belief = update(filter, particle_belief, action, o)
return generate_next_action(next_belief, iteration + 1)
end
generate_next_action()
错误如下
WARNING: redefining constant all_states
WARNING: redefining constant all_observations
1
Belief: Any[0.4960999999999617, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5038999999999608, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
value of give_autonomy : 14.00374048624651
value of revoke_autonomy : 14.174663638724043
Next action is: revoke_autonomy
performed well? (y/n) y
engaged? (y/n) n
ERROR: LoadError: MethodError: Cannot `convert` an object of type Expr to an object of type Float64
Closest candidates are:
convert(::Type{T}, ::T) where T<:Number at number.jl:6
convert(::Type{T}, ::Number) where T<:Number at number.jl:7
convert(::Type{T}, ::Base.TwicePrecision) where T<:Number at twiceprecision.jl:250
...
Stacktrace:
[1] setindex!(::Array{Float64,1}, ::Expr, ::Int64) at .\array.jl:825
[2] reweight! at C:\Users\Administrator\.juliapro\JuliaPro_v1.4.0-1\packages\ParticleFilters\N26ol\src\pomdps.jl:10 [inlined]
[3] reweight! at C:\Users\Administrator\.juliapro\JuliaPro_v1.4.0-1\packages\ParticleFilters\N26ol\src\basic.jl:109 [inlined]
[4] update(::BasicParticleFilter{MOMDP,MOMDP,LowVarianceResampler,Random._GLOBAL_RNG,Array{State,1}}, ::ParticleCollection{State}, ::Symbol, ::Obs) at C:\Users\Administrator\.juliapro\JuliaPro_v1.4.0-1\packages\ParticleFilters\N26ol\src\basic.jl:52
[5] generate_next_action(::ParticleCollection{State}, ::Int64) at F:\bs\piano_momdp_solver-master\julia\1:310
[6] generate_next_action() at F:\bs\piano_momdp_solver-master\julia\1:279
[7] top-level scope at F:\bs\piano_momdp_solver-master\julia\1:314
in expression starting at F:\bs\piano_momdp_solver-master\julia\1:314
自搜了一下,应该是数据类型有问题,但是改了很多都不行,求助一下论坛大佬。(本科毕设阶段刚刚接触Julia,很多东西都不太懂,希望大佬指教)。