我之前用Mxnet,学习的一个例子,python跑的时间比我自己用Julia写的要长,但准确度要高。(我觉得我基本是Mxnet的例子改成Julia,但不知道为啥会差点)
python代码如下:
import d2lzh as d2l
from mxnet import gluon, init, nd, autograd
from mxnet.gluon import loss as gloss, nn
net = nn.Sequential()
net.add(nn.Dense(256, activation='relu'), nn.Dense(10))
net.initialize(init.Normal(sigma=0.01))
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
loss = gloss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.5})
num_epochs = 10
%time d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, trainer)
结果是:
epoch 1, loss 0.7997, train acc 0.702, test acc 0.821
epoch 2, loss 0.4910, train acc 0.817, test acc 0.853
epoch 3, loss 0.4233, train acc 0.845, test acc 0.860
epoch 4, loss 0.3942, train acc 0.853, test acc 0.862
epoch 5, loss 0.3715, train acc 0.862, test acc 0.872
epoch 6, loss 0.3519, train acc 0.869, test acc 0.872
epoch 7, loss 0.3396, train acc 0.874, test acc 0.875
epoch 8, loss 0.3239, train acc 0.880, test acc 0.880
epoch 9, loss 0.3168, train acc 0.883, test acc 0.864
epoch 10, loss 0.3050, train acc 0.886, test acc 0.879
CPU times: user 31.9 s, sys: 1.65 s, total: 33.6 s
Wall time: 34.7 s
Julia代码:
using Flux
using Flux: @epochs
using Statistics
using Random
using Parameters: @with_kw
using IterTools: ncycle
@with_kw mutable struct Args
lr::Float64 = 0.5
batch_size::Int = 256
repeat::Int = 20
end
images = Flux.Data.FashionMNIST.images()
labels = Flux.Data.FashionMNIST.labels()
function get_fashion_label(labels)
text_label = ["t-shirt", "trouser", "pullover", "dress", "coat", "sandal", "shirt", "sneaker", "bag", "ankle boot"]
return [text_label[i+1] for i in labels]
end
#get train_data
data_x = rand(784, 6000)
data_y = []
for i in 1:6000
data_x[:,i] = Float64.(reshape(images[i],(784,1)))
push!(data_y, get_fashion_label(labels)[i])
end
label = sort(unique(data_y))
data_onehot_labels = Flux.onehotbatch(data_y, label)
train_x = data_x[:, [1:3:6000 ; 2:3:6000]]
train_y = data_onehot_labels[:, [1:3:6000 ; 2:3:6000]]
test_x = data_x[:, 3:3:6000]
test_y = data_onehot_labels[:, 3:3:6000]
train_data = Flux.Data.DataLoader((train_x, train_y), batchsize=Args().batch_size, shuffle=true)
#model
model = Chain(
Dense(784, 256, relu),
Dense(256, 10)
)
#define loss function: cross entropy function
loss(x, y) = Flux.logitcrossentropy(model(x), y)
# params
ps = Flux.params(model)
#SDG
opt = Descent(Args().lr)
@time Flux.train!(loss, ps, ncycle(train_data, Args().repeat),opt)
accuracy(x, y, model) = Flux.mean(Flux.onecold(model(x)) .== Flux.onecold(y))
print("train loss: ", accuracy(train_x, train_y, model), ", test loss:", accuracy(test_x, test_y, model))
结果:
12.571922 seconds (314.00 M allocations: 6.982 GiB, 6.54% gc time)
train loss: 0.8555, test loss:0.8285