# 我要怎么优化这个算法？

python代码如下：

``````import d2lzh as d2l
from mxnet import gluon, init, nd, autograd
from mxnet.gluon import loss as gloss, nn

net = nn.Sequential()
net.initialize(init.Normal(sigma=0.01))
batch_size = 256
loss = gloss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.5})
num_epochs = 10
%time d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, trainer)
``````

``````epoch 1, loss 0.7997, train acc 0.702, test acc 0.821
epoch 2, loss 0.4910, train acc 0.817, test acc 0.853
epoch 3, loss 0.4233, train acc 0.845, test acc 0.860
epoch 4, loss 0.3942, train acc 0.853, test acc 0.862
epoch 5, loss 0.3715, train acc 0.862, test acc 0.872
epoch 6, loss 0.3519, train acc 0.869, test acc 0.872
epoch 7, loss 0.3396, train acc 0.874, test acc 0.875
epoch 8, loss 0.3239, train acc 0.880, test acc 0.880
epoch 9, loss 0.3168, train acc 0.883, test acc 0.864
epoch 10, loss 0.3050, train acc 0.886, test acc 0.879
CPU times: user 31.9 s, sys: 1.65 s, total: 33.6 s
Wall time: 34.7 s
``````

Julia代码：

``````using Flux
using Flux: @epochs
using Statistics
using Random
using Parameters: @with_kw
using IterTools: ncycle

@with_kw mutable struct Args
lr::Float64 = 0.5
batch_size::Int = 256
repeat::Int = 20
end

images = Flux.Data.FashionMNIST.images()
labels = Flux.Data.FashionMNIST.labels()

function get_fashion_label(labels)
text_label = ["t-shirt", "trouser", "pullover", "dress", "coat", "sandal", "shirt", "sneaker", "bag", "ankle boot"]
return [text_label[i+1] for i in labels]
end

#get train_data

data_x = rand(784, 6000)
data_y = []

for i in 1:6000
data_x[:,i] = Float64.(reshape(images[i],(784,1)))
push!(data_y, get_fashion_label(labels)[i])
end

label = sort(unique(data_y))
data_onehot_labels = Flux.onehotbatch(data_y, label)

train_x = data_x[:, [1:3:6000 ; 2:3:6000]]
train_y = data_onehot_labels[:, [1:3:6000 ; 2:3:6000]]

test_x = data_x[:, 3:3:6000]
test_y = data_onehot_labels[:, 3:3:6000]

train_data = Flux.Data.DataLoader((train_x, train_y), batchsize=Args().batch_size, shuffle=true)

#model
model = Chain(
Dense(784, 256, relu),
Dense(256, 10)
)

#define loss function: cross entropy function
loss(x, y) = Flux.logitcrossentropy(model(x), y)

# params
ps = Flux.params(model)

#SDG
opt = Descent(Args().lr)

@time Flux.train!(loss, ps, ncycle(train_data, Args().repeat),opt)

accuracy(x, y, model) = Flux.mean(Flux.onecold(model(x)) .== Flux.onecold(y))

print("train loss: ", accuracy(train_x, train_y, model), ", test loss:", accuracy(test_x, test_y, model))
``````

``````12.571922 seconds (314.00 M allocations: 6.982 GiB, 6.54% gc time)
train loss: 0.8555, test loss:0.8285
``````