flux的网络构建好后如何限制某个别层（）的权重非负

dyr · 2020 年6 月 29 日 15:13

我之前询问过如何搭建有传递层的神经网络，很感谢社区小天使JohnnyChen的回答
又来问还挺不好意思的……
我尝试重新写一个update！函数但是的确不会……啊，大概基础比较差感觉编程好难……

又想要batch，用adam来优化，感觉好混乱有些交叉的地方，我也不知道怎么解决后来又都删了。我把我的程序共享出来吧

数据的话也不太重要就不放了，，可以随机产生的(下面这段话可以不看……)

我真的努力写了…但是写的都运行不出来就删了只剩下能运行的了…就是大家可以给我甩一些链接让我好好看看或者举个小例子这样吗呜呜，把代码分享给大家，如果谁想要写类似的程序也可以稍微参考一下下……

不要凶我哦……我后续还会努力写的，写出来的话也会贴出来分享给大家虽然写的也不好……基本都是东拼西凑modelzoo，写代码真的需要积累知识我会继续努力的……

对不起我废话好多……

using Flux, Statistics, DelimitedFiles
using Flux: Params, gradient
using Flux.Optimise: update!
using Flux.Data: DataLoader
using Flux: throttle, @epochs
using Base.Iterators: repeated
using Parameters: @with_kw
using Juno
import Zygote: Params, gradient
using BSON
using BSON: @save, @load

# Struct to define hyperparameters
@with_kw mutable struct Args
    η::Float64 = 3e-4       # learning rate
    β::Tuple = (0.9, 0.8)    #  Exponential decay for the first (β1) and the second (β2) momentum estimate.
    λ = 0.01f0              # regularization paramater
    batchsize::Int = 100  # batch size 一次参数更新运算所需的样本数量，深度学习每一次参数更新并不是一个样本学习运算得来的，而是通过对一组数据进行运算后更新一次参数;1个iteration等于使用batchsize个样本训练一次；iteration为12，4，4
    epochs::Int = 5       # number of epochs 一个epoch表示所有训练样本运算学习一遍。# 比如训练集有500个样本，batchsize = 10 ，那么训练完整个样本集：iteration=50，epoch=1.
    savepath::String = "D:\\---MYFILE---\\--Nerual N--"
 end

# fuction to process data
function get_processed_data()
    # Loading Dataset
    pwd()
    cd("D:\\---MYFILE---\\--Nerual N--")
    rawdata = readdlm("rawdatapqv.txt")' # read data

    # split_ratio = args.split_ratio # For the train test split
    train_p = 0.6  # 0.6
    dev_p = 0.8  # 0.2
    test_p = 1.0  # 0.2

    n = 12
    x = rawdata[1:6n, :] # input with n nodes power injection three phases
    y = rawdata[6n+1:9n, :] # output with n node-voltage three phases

    # Split into train, dev and test sets
    train_index = floor(Int, size(x, 2) * train_p)
    dev_index = floor(Int, size(x, 2) * dev_p)
    test_index = floor(Int, size(x, 2) * test_p)

    # Split input and output
    x_train = x[:, 1:train_index]
    y_train = y[:, 1:train_index]
    x_dev = x[:, 1+train_index:dev_index]
    y_dev = y[:, 1+train_index:dev_index]
    x_test = x[:, dev_index+1:test_index]
    y_test = y[:, dev_index+1:test_index]

    # # stack of data
    # train_data = (x_train, y_train)
    # dev_data = (x_dev, y_dev)
    # test_data = (x_test, y_test)
    #
    # # Batching
    # # If shuffle=true, shuffles the observations each time iterations are re-started. If partial=false, drops the last mini-batch if it is smaller than the batchsize.
    train_data = DataLoader(x_train, y_train, batchsize = 50)
    dev_data = DataLoader(x_dev, y_dev, batchsize = 50)
    test_data = DataLoader(x_test, y_test, batchsize = 50)

    return train_data, dev_data, test_data
 end

# Struct to define model
struct PassThroughBlock
    forward
    passthrough
 end
 Flux.@functor PassThroughBlock    # 告诉Flux这是一个Flux兼容的网络层
 # 先定义一个方便的构造函数
 function PassThroughBlock(;activation = relu)
     Ns = (72, 52, 32, 21, 30, 36)
     Ls = [Dense(Ns[1], Ns[2], activation)]
     Ps = []
     for (n_in, n_out) in zip(Ns[2:end-1], Ns[3:end])
         push!(Ls, Dense(n_in, n_out, activation))
         push!(Ps, Dense(Ns[1], n_out, activation))
     end
     return PassThroughBlock(Chain(Ls)...,  Chain(Ps)...)
  end
 # 接下来定义怎么样进行前向传播
 function (block::PassThroughBlock)(x)
     Ls = block.forward
     Ps = block.passthrough
     z = Ls[1](x)
     for (l, p) in zip(Ls[2:end], Ps)
         z = l(z) + p(x)
     end
     return z
  end

# 测试集的平均误差
function loss_average(dataloader, model)
    l = 0f0
    for (x,y) in dataloader
        l += Flux.mse(m(x), y)
    end
    l/(length(train_data)*50)
end
# test loss_all function
# bb=PassThroughBlock()
# loss_all(train_data,bb)

# 训练 ICNN
function train(; kws...)
    # Initializing Model parameters
    args = Args(; kws...)

    # Load Data
    # (x_train,y_train),(x_dev, y_dev),(x_test,y_test) = get_processed_data()
    train_data, dev_data, test_data = get_processed_data()

    # Construct model
    m = PassThroughBlock()

    # # regularization
    # l1(x) = sum(x .^ 2)
    # l2(x) = sum(abs.(x))

    # loss J & l1
    meansquarederror(ŷ, y) = sum((ŷ .- y).^2)/size(y, 2)
    loss(x, y) = meansquarederror(m(x), y)

    ## Training
    # callback  cb

    evalcb = () -> @show(loss_average(train_data,m))

    opt = ADAM(0.0001,(0.9,0.8))

    @epochs args.epochs Flux.train!(
        loss,
        params(m),
        train_data,
        opt,
        cb = throttle(evalcb, 5),
    )

    BSON.@save joinpath(args.savepath, "ICNN.bson") params = cpu.(params(m))

    @show loss_average(dev_data, m)
end
train()

# Testing the model, from saved model
function test(; kws...)
    args = Args(; kws...)

    # Loading the test data
    _data, _data, test_data = get_processed_data()

    # Re-constructing the model with random initial weights
    m = PassThroughBlock()

    # Loading the saved parameters
    BSON.@load joinpath(args.savepath, "ICNN.bson") params

    # Loading parameters onto the model
    Flux.loadparams!(m, params)


    @show loss_average(test_data, m)
end
test()

Roger · 2020 年6 月 29 日 23:19

就是不要用Flux自带的 train! 自己写一个 train! ，训练过程最简单的版本无非是

for epoch in 1:1000
  update!(m, ps, gs)
end

但是你更新梯度 gs 的时候希望加一个 abs 这个时候修改这个 update! 就可以了，也就是把Flux里面的update代码(https://github.com/FluxML/Flux.jl/blob/master/src/optimise/train.jl#L9)复制过来写一个自己的 update_abs!

function update_abs!(x::AbstractArray, x̄)
  x .-= x̄
  broadcast!(abs, x, x) # 给每个x的entry作用一个abs，不使用新的内存
end

function update_abs!(opt, x, x̄)
  x .-= apply!(opt, x, x̄)
  broadcast!(abs, x, x) # 给每个x的entry作用一个abs，不使用新的内存
end

function update_abs!(opt, xs::Params, gs)
  for x in xs
    gs[x] == nothing && continue
    update_abs!(opt, x, gs[x])
  end
end

johnnychen94 · 2020 年6 月 30 日 05:57

还有一种方式是借助 callback 函数：

model = Chain([PassThroughBlock((36, 52, 32, 21, 30, 36)) for _ in 1:5]...)


evalcb = throttle(() -> @show(loss_average(train_data,m)), 5)
function ensure_positive_weights(model)
    # 假如需要将每个PassThroughBlock的所有passthrough连接的权重都设为非负
    abs_indices = filter(i->model[i] isa PassThroughBlock, 1:length(model))

    for i in abs_indices
        ps = params(model[i].passthrough)
        broadcast!.(abs, ps, ps) # 或者 x->max.(0, x)
    end
end

weights_cb() = ensure_positive_weights(model)
cb() = begin
    weights_cb() # 每次迭代都会调用
    evalcb() # 最高5秒一次的频率调用
end

Flux.train!(loss, params(model), train_data, opt, cb = cb)

另外，我之前给的代码有个地方是不太对的：

function PassThroughBlock(;activation = relu)
     Ns = (72, 52, 32, 21, 30, 36)
     Ls = [Dense(Ns[1], Ns[2], activation)]
     Ps = []
     for (n_in, n_out) in zip(Ns[2:end-1], Ns[3:end])
         push!(Ls, Dense(n_in, n_out, activation))
         push!(Ps, Dense(Ns[1], n_out, activation))
     end
-     return PassThroughBlock(Chain(Ls)...,  Chain(Ps)...)
+     return PassThroughBlock(Chain(Ls...),  Chain(Ps...))
  end

johnnychen94 · 2020 年6 月 30 日 06:11

关于batch的话个人比较喜欢MLDataUtils提供的一些工具shuffleobs, BatchView。但实际上使用起来非常自由

# train_X 为 WHCN
# ProgressMeter.@showprogress
# MappedArrays.mappedarray

@showprogress 1 "Training..." for epoch in 1:epochs
        # 我这个例子里的训练集是训练时生成的
        X = add_noise(train_X, σ)
        Y = X - train_X

        X, Y = shuffleobs((X, Y))
        X = BatchView(X, batch_size, ObsDim.Last())
        Y = BatchView(Y, batch_size, ObsDim.Last())
        if use_gpu
            model = gpu(model)
            X = mappedarray(CuArray, X) # lazy conversion to GPU
            Y = mappedarray(CuArray, Y)
        end

        Flux.train!(
            loss,
            params(model),
            zip(X, Y), # zip + BatchView 就实现了类似于 DataLoader 的功能
            opt;
            cb = evalcb,
        )

        bson(joinpath(checkpoint_dir, "model-$(epoch).bson"), model = cpu(model))
    end

Jerrywang959 · 2020 年6 月 30 日 06:34

那个有颜色的加减标记是如何实现的啊

johnnychen94 · 2020 年6 月 30 日 06:54

除此之外，在github上可以通过suggestion标记来给出这样的评论

dyr · 2020 年6 月 30 日 11:12

谢谢啦！！我明白啦！！

dyr · 2020 年6 月 30 日 11:12

感恩！！您可太棒了

dyr · 2020 年6 月 30 日 15:49

请问这句为什么要五个passthroughblock呢
如果只是五层前向，四层传递这种是一个block就可以的吧……还是我没有理解清楚这个含义呢……

johnnychen94 · 2020 年6 月 30 日 16:40

额，只是简单模拟一下一个完整的网络，一个完整的网络应该不止包含一个PassThroughBlock，它还可能包含其他的层(Dense, Softmax) 之类的。我不太清楚PassThroughBlock的网络具体怎么搭，但看起来就像resnet一样把多个ResNetBlock堆叠起来就行了。

dyr · 2020 年6 月 30 日 16:48

这样呀谢谢啦~~