As always, please do as I suggested to learn your machine learning stuff at http://zen747.blogspot.tw/2018/03/i.html, to prevent wasting time.
/* * This sample use gradient descend to classify a spiral dataset * http://cs231n.github.io/neural-networks-case-study/ */ #include <vector> #include <deque> #include <algorithm> #include <iostream> #include <cmath> #include <cassert> #include <ctime> #include <cfloat> #include "MesenneTwister.h" #define USE_SDL 1 #if USE_SDL #include <SDL2/SDL.h> #include <SDL2/SDL_image.h> #include <SDL2/SDL_ttf.h> #endif using namespace std; // The regularization scale the weights toward 0 #define REGULARIZATION_FACTOR 1e-2 #define DROPOUT_RATE 0.25 #define INPUT_DROPOUT_RATE 0.001 #define MAX_NORM 10 #define LEARNING_RATE 0.02 #define BATCH_SIZE 20 #define LEAKY_RELU_CONSTANT 0.005 const double epsilon = 1e-8; MTRand g_rng(time(NULL)); // used in adam algorithm int g_generation = 0; //-------------------------------- template <int ACTIVATE_FUNC_TYPE> double activate (double x) { return 0; } template <int ACTIVATE_FUNC_TYPE> double activate_drv (double y) { return 0; } enum { SIGMOID, TANH, RELU }; template<> double activate<SIGMOID>(double x) { return 1.0 / (1.0 + exp(-x)); } template<> double activate_drv<SIGMOID> (double y) { return y * (1.0 - y); } template<> double activate<TANH>(double x) { return tanh (x); } template<> double activate_drv<TANH> (double y) { return 1.0 - y * y; } template<> double activate<RELU>(double x) { return (x > 0) ? x : LEAKY_RELU_CONSTANT * x; } template<> double activate_drv<RELU>(double y) { return (y > 0) ? 1.0 : LEAKY_RELU_CONSTANT; } //--------- double activate_func (double x) { return activate<RELU>(x); } double activate_drv_func (double y) { return activate_drv<RELU>(y); } //----------- class Layer; struct NetworkConfig { bool training_; NetworkConfig() : training_(false) {} }; class Neuron { public: Neuron (int numInputs=0); void init_weights (double mean, double variance, double magnitude); double calculate_output_from (vector<double> const&inputs); void calculate_pdrv_of_error_wrt_input_weights(vector<double> const&inputs, double drvLoss); void calculate_pdrv_of_error_wrt_input_weights(vector<double> const&inputs, const Layer &output_layer); const vector<double> & weights() const; size_t set_input_weights(const double *weights); void clear_pdrvs (); void update_weights (double learning_rate, double momentum, bool reguon); public: int idx_; vector<double> inputWeights_; double value_; bool droped_out_; vector<double> deltaWeight_; vector<double> momentum_; double pdrvErrorWrtInput_; vector<double> pdrvErrorWrtWeights_; }; class Layer { public: Layer(NetworkConfig *netconfig, int numNeurons, int numInputs); ~Layer(); void init_weights (double mean, double variance, double magnitude); void forward_pass(const Layer &last_layer); void calculate_output_layer_pdrv_of_error_wrt_input_weights (vector<double> const&inputs, vector<double> const&outputsExp, double totalExp, int right_ans ); void calculate_middle_layer_pdrv_of_error_wrt_input_weights (vector<double> const&inputs, const Layer &output_layer); void save_weights_to(vector<double> &weights) const; size_t set_input_weights(const double *weights); const vector<double> & get_outputs() const; void set_outputs_of_input_layer(const vector<double> &value); size_t num_of_neurons () const; void clear_pdrvs (); void update_weights (double learning_rate, double momentum, bool reguon); void mark_dropout (double dropout_rate); void mark_no_dropout (); public: vector<Neuron> nodes_; vector<double> outputs_; NetworkConfig *netconfig_; bool output_layer_; }; Neuron::Neuron(int numInputs) : droped_out_(false) { inputWeights_.resize(numInputs+1); // plus weight for bias } void Neuron::init_weights(double mean, double variance, double magnitude) { // This initialization technic helps a lot double sqrtn = sqrt(2.0 * inputWeights_.size()); for (size_t i=0; i < inputWeights_.size()-1; ++i) { inputWeights_[i] = magnitude * g_rng.randNorm(mean, variance) / sqrtn; } inputWeights_.back() = 0; } double Neuron::calculate_output_from (const vector< double >& inputs) { assert (inputs.size()+1 == inputWeights_.size() && "number of inputs wrong"); if (droped_out_) { value_ = 0; return 0; } double v=0; for (size_t i=0; i < inputs.size(); ++i) { v += inputs[i] * inputWeights_[i]; } v += inputWeights_.back(); // bias value_ = activate_func(v); return value_; } void Neuron::calculate_pdrv_of_error_wrt_input_weights(vector<double> const&inputs, double drvLoss) { pdrvErrorWrtInput_ = drvLoss * activate_drv_func(value_); size_t i=0; for (; i < pdrvErrorWrtWeights_.size() - 1; ++i) { pdrvErrorWrtWeights_[i] += pdrvErrorWrtInput_ * inputs[i]; } pdrvErrorWrtWeights_[i] += pdrvErrorWrtInput_ * 1.0; // and bias which is 1.0 } void Neuron::calculate_pdrv_of_error_wrt_input_weights(const vector< double >& inputs, const Layer& output_layer) { if (droped_out_) { pdrvErrorWrtInput_ = 0; return; } double d = 0; for (size_t i=0; i < output_layer.num_of_neurons(); ++i) { d += output_layer.nodes_[i].inputWeights_[idx_] * output_layer.nodes_[i].pdrvErrorWrtInput_; } pdrvErrorWrtInput_ = d * activate_drv_func(value_); size_t i=0; for (; i< pdrvErrorWrtWeights_.size() - 1; ++i) { pdrvErrorWrtWeights_[i] += pdrvErrorWrtInput_ * inputs[i]; } pdrvErrorWrtWeights_[i] += pdrvErrorWrtInput_ * 1.0; // and bias which is 1.0 } const vector< double >& Neuron::weights() const { return inputWeights_; } size_t Neuron::set_input_weights(const double* weights) { inputWeights_.assign(weights, weights + inputWeights_.size()); pdrvErrorWrtWeights_.resize(inputWeights_.size()); deltaWeight_.resize(inputWeights_.size()); momentum_.resize(inputWeights_.size()); return inputWeights_.size(); } void Neuron::clear_pdrvs() { pdrvErrorWrtWeights_.clear(); pdrvErrorWrtWeights_.resize(inputWeights_.size()); } void Neuron::update_weights(double learning_rate, double momentum, bool reguon) { double weight_norm = 0; for (size_t i=0; i < inputWeights_.size(); ++i) { double dx = pdrvErrorWrtWeights_[i]; double regularization = 0; if (reguon) { regularization = learning_rate * REGULARIZATION_FACTOR * inputWeights_[i]; } /* RMSprop double decay_rate = 0.99; deltaWeight_[i] = decay_rate * deltaWeight_[i] + (1.0 - decay_rate) * dx * dx; inputWeights_[i] += -learning_rate * dx / (sqrt(deltaWeight_[i]) + epsilon) - regularization; //*/ //* Adam double beta1=0.9; double beta2=0.999; momentum_[i] = beta1*momentum_[i] + (1.0 - beta1) * dx; double mt = momentum_[i] / (1.0 - pow(beta1, g_generation)); //double mt = momentum_[i]; deltaWeight_[i] = beta2 * deltaWeight_[i] + (1.0 - beta2) * dx * dx; double vt = deltaWeight_[i] / (1.0 - pow(beta2, g_generation)); //double vt = deltaWeight_[i]; inputWeights_[i] += -learning_rate * mt / (sqrt(vt) + epsilon) - regularization; //*/ weight_norm += inputWeights_[i] * inputWeights_[i]; } weight_norm = sqrt(weight_norm); if (weight_norm > MAX_NORM) { double r = MAX_NORM / weight_norm; for (size_t i=0; i < inputWeights_.size(); ++i) { inputWeights_[i] *= r; } } } Layer::Layer(NetworkConfig *netconfig, int numNeurons=0, int numInputsPerNeuron=0) : netconfig_(netconfig) , output_layer_(false) { nodes_.resize(numNeurons); for (size_t i=0; i < nodes_.size(); ++i) { nodes_[i] = Neuron(numInputsPerNeuron); nodes_[i].idx_ = i; } outputs_.resize(numNeurons); } Layer::~Layer() { nodes_.clear(); } void Layer::init_weights(double mean, double variance, double magnitude) { for (size_t i=0; i < nodes_.size(); ++i) { nodes_[i].init_weights (mean, variance, magnitude); } } void Layer::forward_pass(const Layer &last_layer) { double rate = 1.0 - DROPOUT_RATE; const vector<double> & inputs = last_layer.get_outputs(); for (size_t i=0; i < nodes_.size(); ++i) { if (nodes_[i].droped_out_) { outputs_[i] = nodes_[i].value_ = 0; } else { nodes_[i].calculate_output_from (inputs); if (!output_layer_ && !netconfig_->training_) { nodes_[i].value_ *= rate; } outputs_[i] = nodes_[i].value_; } } } void Layer::calculate_output_layer_pdrv_of_error_wrt_input_weights(vector<double> const&inputs, vector<double> const&outputsExp, double totalExp, int right_ans ) { assert (outputsExp.size() == nodes_.size()); for (size_t i=0; i < nodes_.size(); ++i) { double drvLoss; if (i == right_ans) { drvLoss = outputsExp[i] / totalExp -1.0; } else { drvLoss = outputsExp[i] / totalExp; } nodes_[i].calculate_pdrv_of_error_wrt_input_weights (inputs, drvLoss); } } void Layer::calculate_middle_layer_pdrv_of_error_wrt_input_weights(const vector< double >& inputs, const Layer& output_layer) { for (size_t i=0; i < nodes_.size(); ++i) { nodes_[i].calculate_pdrv_of_error_wrt_input_weights (inputs, output_layer); } } void Layer::save_weights_to(vector< double >& weights) const { for (size_t i=0; i < nodes_.size(); ++i) { const vector<double> &w = nodes_[i].weights(); weights.insert(weights.end(), w.begin(), w.end()); } } size_t Layer::set_input_weights(const double* weights) { size_t pos = 0; for (size_t i=0; i < nodes_.size(); ++i) { pos += nodes_[i].set_input_weights(weights + pos); } return pos; } void Layer::set_outputs_of_input_layer(const vector< double >& value) { outputs_ = value; for (size_t i=0; i < nodes_.size(); ++i) { if (netconfig_->training_ && nodes_[i].droped_out_) { nodes_[i].value_ = outputs_[i] = 0; } else { nodes_[i].value_ = outputs_[i]; } } } const vector< double >& Layer::get_outputs() const { return outputs_; } size_t Layer::num_of_neurons() const { return nodes_.size(); } void Layer::clear_pdrvs() { for (size_t i=0; i < nodes_.size(); ++i) { nodes_[i].clear_pdrvs(); } } void Layer::update_weights(double learning_rate, double momentum, bool reguon) { for (size_t i=0; i < nodes_.size(); ++i) { nodes_[i].update_weights(learning_rate, momentum, reguon); } } void Layer::mark_dropout(double dropout_rate) { for (size_t i=0; i < nodes_.size(); ++i) { nodes_[i].droped_out_ = (g_rng.rand() < dropout_rate); } } void Layer::mark_no_dropout() { for (size_t i=0; i < nodes_.size(); ++i) { nodes_[i].droped_out_ = false; } } class NeuralNetwork { public: NeuralNetwork(int numInputs, vector<int> const& hiddenLayerTopology, int numOutpus); ~NeuralNetwork(); void init_weights(double mean_value, double variance_value, double magnitude); size_t number_of_weights () const; void run (vector<double> const &inputs, vector<double> &outputs); void train (vector<double> const &inputs, vector<double> &outputs); void forward_pass (vector<double> const &inputs); void backward_pass (vector<double> &outputs, int right_ans); void save_weights_to(vector<double> &weights) const; void set_weights(vector<double> const &weights); void clear_pdrvs (); void update_weights (double learning_rate, double momentum); double error () const { return last_error_; } bool regularization_on () const { return regularization_on_; } void set_regularization_on () { regularization_on_ = true; } protected: void feed_forward (); void mark_dropout (); void mark_no_dropout (); private: vector<Layer *> layers_; double last_error_; NetworkConfig *netconfig_; bool regularization_on_; NeuralNetwork(const NeuralNetwork &rhs) {} NeuralNetwork &operator=(const NeuralNetwork &rhs) { return *this; } }; NeuralNetwork::NeuralNetwork(int nInputs, const vector< int >& hiddenLayerTopology, int nOutputs) : last_error_(0) , netconfig_(new NetworkConfig) , regularization_on_(false) { layers_.resize(hiddenLayerTopology.size() + 2); // hidden + inputs + outputs int nNeuron = nInputs; layers_[0] = new Layer(netconfig_, nNeuron, 0); // layer zero is inputs for (size_t i=0; i < hiddenLayerTopology.size(); ++i) { layers_[i+1] = new Layer(netconfig_, hiddenLayerTopology[i], layers_[i]->num_of_neurons()); } nNeuron = nOutputs; layers_.back() = new Layer(netconfig_, nNeuron, hiddenLayerTopology.back()); } NeuralNetwork::~NeuralNetwork() { delete netconfig_; for (size_t i=0; i < layers_.size(); ++i) { delete layers_[i]; } layers_.clear(); } void NeuralNetwork::init_weights(double mean, double variance, double magnitude) { for (size_t i=1; i < layers_.size(); ++i) { layers_[i]->init_weights(mean, variance, magnitude); } } size_t NeuralNetwork::number_of_weights() const { vector<double> weights; this->save_weights_to(weights); return weights.size(); } void NeuralNetwork::run(const vector< double >& inputs, vector< double >& outputs) { if (netconfig_->training_) { netconfig_->training_ = false; mark_no_dropout (); } forward_pass(inputs); outputs = layers_.back()->get_outputs(); } void NeuralNetwork::train(const vector< double >& inputs, vector< double >& outputs) { if (!netconfig_->training_) { netconfig_->training_ = true; mark_dropout (); } forward_pass(inputs); outputs = layers_.back()->get_outputs(); } void NeuralNetwork::forward_pass(const vector< double >& inputs) { // feed forward // set inputs as outputs of first layer layers_[0]->set_outputs_of_input_layer(inputs); feed_forward (); } void NeuralNetwork::backward_pass(vector< double >& outputs, int right_ans) { // calculate errors/loss double denom = 0; double numer = 0; last_error_ = 0; double max_out=-DBL_MAX; for (size_t i=0; i < outputs.size(); ++i) { if (outputs[i] > max_out) { max_out = outputs[i]; } } vector<double> outputsExp; outputsExp.reserve(outputs.size()); double totalExp = 0; for (size_t i=0; i < outputs.size(); ++i) { outputsExp.push_back(exp(outputs[i] - max_out)); if (i == right_ans) { numer = outputsExp.back(); } denom += outputsExp.back(); } totalExp = denom; last_error_ = -log(numer / denom); // calculate partial derivatives // first the last layer(output layer) vector< double > layer_inputs = layers_[layers_.size()-2]->get_outputs(); layers_.back()->calculate_output_layer_pdrv_of_error_wrt_input_weights(layer_inputs, outputsExp, totalExp, right_ans); // back propagate for (int i=layers_.size()-2; i > 0; --i) { vector< double > layer_inputs = layers_[i-1]->get_outputs(); layers_[i]->calculate_middle_layer_pdrv_of_error_wrt_input_weights(layer_inputs, *layers_[i+1]); } } void NeuralNetwork::feed_forward() { for (size_t i=1; i < layers_.size(); ++i) { layers_[i]->forward_pass(*layers_[i-1]); } } void NeuralNetwork::mark_dropout() { for (size_t i=0; i < layers_.size()-1; ++i) { double r = DROPOUT_RATE; if (i==0) r = INPUT_DROPOUT_RATE; layers_[i]->mark_dropout(r); } } void NeuralNetwork::mark_no_dropout() { for (size_t i=0; i < layers_.size()-1; ++i) { layers_[i]->mark_no_dropout(); } } void NeuralNetwork::clear_pdrvs() { for (size_t i=1; i < layers_.size(); ++i) { layers_[i]->clear_pdrvs(); } } void NeuralNetwork::update_weights(double learning_rate, double momentum) { assert (netconfig_->training_ && "why update weights when not training?"); // update weights for (size_t i=1; i < layers_.size(); ++i) { layers_[i]->update_weights(learning_rate, momentum, regularization_on_); } mark_dropout(); clear_pdrvs(); } void NeuralNetwork::save_weights_to(vector< double >& weights) const { weights.clear(); for (size_t i=1; i < layers_.size(); ++i) { layers_[i]->save_weights_to (weights); } } void NeuralNetwork::set_weights(const vector< double >& weights) { int pos = 0; for (size_t i=1; i < layers_.size(); ++i) { pos += layers_[i]->set_input_weights (&weights[pos]); } } struct Genome { double fitness_; vector<double> genes_; Genome(size_t numGenes=0); void copy_genes_from (Genome const&source, size_t from_index, size_t to_index, size_t replace_start_index, double mutate_rate); void report () const; bool operator<(const Genome &rhs) const { return fitness_ > rhs.fitness_; } }; Genome::Genome(size_t numGenes) { assert (numGenes && "number of genes can't be zero"); genes_.resize(numGenes); } void Genome::copy_genes_from(const Genome& source, size_t from_index, size_t to_index, size_t replace_start_index, double mutate_rate) { for (size_t i=from_index,cgene=0; i < to_index; ++i,++cgene) { genes_[replace_start_index+cgene] = source.genes_[i]; } } void Genome::report() const { cout << "weights "; for (size_t i=0; i < genes_.size(); ++i) { cout << genes_[i] << " "; } } struct Point { double x_, y_; int class_; }; const int nClass = 3; struct DataSet { vector<Point> samples_; void randomize () { vector<Point> sam; while (!samples_.empty()) { int idx = g_rng.randInt() % samples_.size(); sam.push_back(samples_[idx]); samples_[idx] = samples_.back(); samples_.pop_back(); } sam.swap(samples_); } }; #if USE_SDL #define RENDER_WINDOW_WIDTH 600 #define RENDER_WINDOW_HEIGHT 600 struct Color { Uint8 r_, g_, b_; Color(Uint8 r,Uint8 g, Uint8 b) : r_(r), g_(g), b_(b) { } }; class Renderer { SDL_Window *win_; SDL_Renderer *ren_; SDL_Texture *tex_; int init_sdl () { if (SDL_Init(SDL_INIT_VIDEO) != 0){ std::cout << "SDL_Init Error: " << SDL_GetError() << std::endl; return 1; } win_ = SDL_CreateWindow("Hello World!", 100, 100, RENDER_WINDOW_WIDTH, RENDER_WINDOW_HEIGHT, SDL_WINDOW_SHOWN); if (win_ == 0){ std::cout << "SDL_CreateWindow Error: " << SDL_GetError() << std::endl; SDL_Quit(); return 1; } ren_ = SDL_CreateRenderer(win_, -1, SDL_RENDERER_ACCELERATED | SDL_RENDERER_PRESENTVSYNC); if (ren_ == 0){ SDL_DestroyWindow(win_); std::cout << "SDL_CreateRenderer Error: " << SDL_GetError() << std::endl; SDL_Quit(); return 1; } std::string imagePath = "circle.png"; tex_ = IMG_LoadTexture(ren_, imagePath.c_str()); if (tex_ == 0) { SDL_DestroyRenderer(ren_); SDL_DestroyWindow(win_); std::cout << "SDL_CreateTextureFromSurface Error: " << SDL_GetError() << std::endl; SDL_Quit(); return 1; } return 0; } public: Renderer () : win_(0), ren_(0), tex_(0) { } ~Renderer () { if (tex_) SDL_DestroyTexture(tex_); if (ren_) SDL_DestroyRenderer(ren_); if (win_) SDL_DestroyWindow(win_); SDL_Quit(); } int start () { return init_sdl (); } int render (DataSet const&sData) { SDL_SetRenderDrawColor(ren_, 255, 255, 255, 255); SDL_RenderClear(ren_); SDL_Rect dst; dst.w = dst.h = 16; int offX = dst.w/2, offY=dst.h/2; vector<Color> cvClass; cvClass.push_back(Color(255,0,0)); cvClass.push_back(Color(255,255,0)); cvClass.push_back(Color(0,255,0)); cvClass.push_back(Color(0,0,255)); for (size_t i=0; i < sData.samples_.size(); ++i) { const Color &color = cvClass[sData.samples_[i].class_]; SDL_SetTextureColorMod(tex_, color.r_, color.g_, color.b_); dst.x = RENDER_WINDOW_WIDTH * (0.5 + sData.samples_[i].x_) - offX; dst.y = RENDER_WINDOW_HEIGHT * (0.5 + sData.samples_[i].y_) - offY; SDL_RenderCopy(ren_, tex_, NULL, &dst); } //Update the screen SDL_RenderPresent(ren_); //SDL_Delay(3000); return 0; } }; #endif void generate_data_set (DataSet &sData, int nPointsPerClass) { int nC = nClass + 1; for (int iC=0; iC < nClass; ++iC) { for (int iP=0; iP < nPointsPerClass; ++iP) { Point p; p.class_ = iC; double radius = 0.001 + double(iP) / nPointsPerClass; double angle = nC * iC + 1.0 * nC * radius + g_rng.randNorm() * 0.2; p.x_ = 0.5 * radius * sin(angle); p.y_ = 0.5 * radius * cos(angle); sData.samples_.push_back(p); } } } bool answer_correct (vector<double> const&outputs, int right_ans) { double max_term=-DBL_MAX; int ans = -1; for (size_t i=0; i < outputs.size(); ++i) { if (outputs[i] > max_term) { max_term = outputs[i]; ans = i; } } return ans == right_ans; } const int numInputPoints = 2; const int numOutputPoints = 4; int test_check (NeuralNetwork &network) { vector<double> inputs(numInputPoints); vector<double> outputs(numOutputPoints); cout << "test... "; DataSet sDataTest; generate_data_set (sDataTest, 100); int correct_answer = 0; for (size_t i=0; i < sDataTest.samples_.size(); ++i) { inputs.clear(); inputs.push_back(sDataTest.samples_[i].x_); inputs.push_back(sDataTest.samples_[i].y_); network.run(inputs, outputs); //expects.clear(); //load_expects (expects, sDataTest.samples_[i].class_); //network.backward_pass(outputs, sDataTest.samples_[i].class_); if (answer_correct(outputs, sDataTest.samples_[i].class_)) { ++correct_answer; } else { //cout << i << " at (" << sDataTest.samples_[i].x_ << "," <<sDataTest.samples_[i].x_ << ") "; //cout << "value " << outputs[0] << "," << outputs[1] << "," << outputs[2] << " vs " << sDataTest.samples_[i].class_ << "\n"; } } cout << "precision: " << (100.0 * double(correct_answer) / sDataTest.samples_.size()) << "%" << endl; return correct_answer; } int main() { int iSeed = time(NULL)%1000000; //iSeed = 7; g_rng.seed(iSeed); cout << "use seed " << iSeed << endl; DataSet sData; generate_data_set (sData, 100); sData.randomize(); #if USE_SDL Renderer renderer; if (renderer.start() != 0) { cout << "SDL2 initialization failed." << endl; return 1; } renderer.render(sData); #endif vector<int> hidden_layers_topology; // hidden layers, from input to output hidden_layers_topology.push_back(160); /* hidden_layers_topology.push_back(30); hidden_layers_topology.push_back(30); //*/ NeuralNetwork network(numInputPoints, hidden_layers_topology, numOutputPoints); network.init_weights(0, 1.0, 1.0); const size_t numGenes = network.number_of_weights(); Genome entity(numGenes); network.save_weights_to(entity.genes_); vector<double> inputs(numInputPoints); vector<double> outputs(numOutputPoints); network.set_weights(entity.genes_); const double learning_rate = LEARNING_RATE; const double momentum = 0.9; const int batch_size = BATCH_SIZE; while (g_generation < 500) { // cout << "generation " << g_generation << " "; ++g_generation; int run_count = 0; double max_error = 0; int correct_answer = 0; for (size_t i=0; i < sData.samples_.size(); ++i) { inputs.clear(); inputs.push_back(sData.samples_[i].x_); inputs.push_back(sData.samples_[i].y_); network.train(inputs, outputs); if (answer_correct(outputs, sData.samples_[i].class_)) { ++correct_answer; } network.backward_pass(outputs, sData.samples_[i].class_); max_error = network.error() > max_error ? network.error() : max_error; ++run_count; if (run_count >= batch_size) { network.update_weights(learning_rate, momentum); run_count = 0; } } if (run_count != 0) { network.update_weights(learning_rate, momentum); run_count = 0; } if (g_generation % 10 == 0) { int correct = test_check(network); if (!network.regularization_on () && double(correct_answer - correct) / correct_answer > 0.05 ) { network.set_regularization_on (); cout << "\n\t\t**** regularization on ****\n" << endl; } } } test_check(network); network.save_weights_to(entity.genes_); double weights = 0; for (size_t i=0; i < entity.genes_.size(); ++i) { weights += fabs(entity.genes_[i]); } cout << "(" << entity.genes_.size() << ") weights\n"; cout << "|totalWeights| = " << weights << "\n"; entity.report(); cout << "\n"; return 0; }
This toy example needs only several epochs to achieve good precision. #define USE_SDL 0 to build if you don't have SDL installed. On Linux system this should be very simple to do.
/* * a fizz buzz example * http://joelgrus.com/2016/05/23/fizz-buzz-in-tensorflow/ */ #include <vector> #include <deque> #include <algorithm> #include <iostream> #include <cmath> #include <cassert> #include <ctime> #include <cfloat> #include "MesenneTwister.h" using namespace std; // The regularization scale the weights toward 0 #define REGULARIZATION_FACTOR 1e-2 #define DROPOUT_RATE 0.1 #define INPUT_DROPOUT_RATE 0.0 #define MAX_NORM 10 #define LEARNING_RATE 0.001 #define BATCH_SIZE 32 #define LEAKY_RELU_CONSTANT 0.005 const double epsilon = 1e-8; MTRand g_rng(time(NULL)); // used in adam algorithm int g_generation = 0; //-------------------------------- template <int ACTIVATE_FUNC_TYPE> double activate (double x) { return 0; } template <int ACTIVATE_FUNC_TYPE> double activate_drv (double y) { return 0; } enum { SIGMOID, TANH, RELU }; template<> double activate<SIGMOID>(double x) { return 1.0 / (1.0 + exp(-x)); } template<> double activate_drv<SIGMOID> (double y) { return y * (1.0 - y); } template<> double activate<TANH>(double x) { return tanh (x); } template<> double activate_drv<TANH> (double y) { return 1.0 - y * y; } template<> double activate<RELU>(double x) { return (x > 0) ? x : LEAKY_RELU_CONSTANT * x; } template<> double activate_drv<RELU>(double y) { return (y > 0) ? 1.0 : LEAKY_RELU_CONSTANT; } //--------- double activate_func (double x) { return activate<RELU>(x); } double activate_drv_func (double y) { return activate_drv<RELU>(y); } //----------- class Layer; struct NetworkConfig { bool training_; NetworkConfig() : training_(false) {} }; class Neuron { public: Neuron (int numInputs=0); void init_weights (double mean, double variance, double magnitude); double calculate_output_from (vector<double> const&inputs); void calculate_pdrv_of_error_wrt_input_weights(vector<double> const&inputs, double drvLoss); void calculate_pdrv_of_error_wrt_input_weights(vector<double> const&inputs, const Layer &output_layer); const vector<double> & weights() const; size_t set_input_weights(const double *weights); void clear_pdrvs (); void update_weights (double learning_rate, double momentum, bool reguon); public: int idx_; vector<double> inputWeights_; double value_; bool droped_out_; vector<double> deltaWeight_; vector<double> momentum_; double pdrvErrorWrtInput_; vector<double> pdrvErrorWrtWeights_; }; class Layer { public: Layer(NetworkConfig *netconfig, int numNeurons, int numInputs); ~Layer(); void init_weights (double mean, double variance, double magnitude); void forward_pass(const Layer &last_layer); void calculate_output_layer_pdrv_of_error_wrt_input_weights (vector<double> const&inputs, vector<double> const&outputsExp, double totalExp, int right_ans ); void calculate_middle_layer_pdrv_of_error_wrt_input_weights (vector<double> const&inputs, const Layer &output_layer); void save_weights_to(vector<double> &weights) const; size_t set_input_weights(const double *weights); const vector<double> & get_outputs() const; void set_outputs_of_input_layer(const vector<double> &value); size_t num_of_neurons () const; void clear_pdrvs (); void update_weights (double learning_rate, double momentum, bool reguon); void mark_dropout (double dropout_rate); void mark_no_dropout (); public: vector<Neuron> nodes_; vector<double> outputs_; NetworkConfig *netconfig_; bool output_layer_; }; Neuron::Neuron(int numInputs) : droped_out_(false) { inputWeights_.resize(numInputs+1); // plus weight for bias } void Neuron::init_weights(double mean, double variance, double magnitude) { // This initialization technic helps a lot double sqrtn = sqrt(2.0 * inputWeights_.size()); for (size_t i=0; i < inputWeights_.size()-1; ++i) { inputWeights_[i] = magnitude * g_rng.randNorm(mean, variance) / sqrtn; } inputWeights_.back() = 0; } double Neuron::calculate_output_from (const vector< double >& inputs) { assert (inputs.size()+1 == inputWeights_.size() && "number of inputs wrong"); if (droped_out_) { value_ = 0; return 0; } double v=0; for (size_t i=0; i < inputs.size(); ++i) { v += inputs[i] * inputWeights_[i]; } v += inputWeights_.back(); // bias value_ = activate_func(v); return value_; } void Neuron::calculate_pdrv_of_error_wrt_input_weights(vector<double> const&inputs, double drvLoss) { pdrvErrorWrtInput_ = drvLoss * activate_drv_func(value_); size_t i=0; for (; i < pdrvErrorWrtWeights_.size() - 1; ++i) { pdrvErrorWrtWeights_[i] += pdrvErrorWrtInput_ * inputs[i]; } pdrvErrorWrtWeights_[i] += pdrvErrorWrtInput_ * 1.0; // and bias which is 1.0 } void Neuron::calculate_pdrv_of_error_wrt_input_weights(const vector< double >& inputs, const Layer& output_layer) { if (droped_out_) { pdrvErrorWrtInput_ = 0; return; } double d = 0; for (size_t i=0; i < output_layer.num_of_neurons(); ++i) { d += output_layer.nodes_[i].inputWeights_[idx_] * output_layer.nodes_[i].pdrvErrorWrtInput_; } pdrvErrorWrtInput_ = d * activate_drv_func(value_); size_t i=0; for (; i< pdrvErrorWrtWeights_.size() - 1; ++i) { pdrvErrorWrtWeights_[i] += pdrvErrorWrtInput_ * inputs[i]; } pdrvErrorWrtWeights_[i] += pdrvErrorWrtInput_ * 1.0; // and bias which is 1.0 } const vector< double >& Neuron::weights() const { return inputWeights_; } size_t Neuron::set_input_weights(const double* weights) { inputWeights_.assign(weights, weights + inputWeights_.size()); pdrvErrorWrtWeights_.resize(inputWeights_.size()); deltaWeight_.resize(inputWeights_.size()); momentum_.resize(inputWeights_.size()); return inputWeights_.size(); } void Neuron::clear_pdrvs() { pdrvErrorWrtWeights_.clear(); pdrvErrorWrtWeights_.resize(inputWeights_.size()); } void Neuron::update_weights(double learning_rate, double momentum, bool reguon) { double weight_norm = 0; for (size_t i=0; i < inputWeights_.size(); ++i) { double dx = pdrvErrorWrtWeights_[i]; double regularization = 0; if (reguon) { regularization = learning_rate * REGULARIZATION_FACTOR * inputWeights_[i]; } /* RMSprop double decay_rate = 0.99; deltaWeight_[i] = decay_rate * deltaWeight_[i] + (1.0 - decay_rate) * dx * dx; inputWeights_[i] += -learning_rate * dx / (sqrt(deltaWeight_[i]) + epsilon) - regularization; //*/ //* Adam double beta1=0.9; double beta2=0.999; momentum_[i] = beta1*momentum_[i] + (1.0 - beta1) * dx; double mt = momentum_[i] / (1.0 - pow(beta1, g_generation)); //double mt = momentum_[i]; deltaWeight_[i] = beta2 * deltaWeight_[i] + (1.0 - beta2) * dx * dx; double vt = deltaWeight_[i] / (1.0 - pow(beta2, g_generation)); //double vt = deltaWeight_[i]; inputWeights_[i] += -learning_rate * mt / (sqrt(vt) + epsilon) - regularization; //*/ weight_norm += inputWeights_[i] * inputWeights_[i]; } weight_norm = sqrt(weight_norm); if (weight_norm > MAX_NORM) { double r = MAX_NORM / weight_norm; for (size_t i=0; i < inputWeights_.size(); ++i) { inputWeights_[i] *= r; } } } Layer::Layer(NetworkConfig *netconfig, int numNeurons=0, int numInputsPerNeuron=0) : netconfig_(netconfig) , output_layer_(false) { nodes_.resize(numNeurons); for (size_t i=0; i < nodes_.size(); ++i) { nodes_[i] = Neuron(numInputsPerNeuron); nodes_[i].idx_ = i; } outputs_.resize(numNeurons); } Layer::~Layer() { nodes_.clear(); } void Layer::init_weights(double mean, double variance, double magnitude) { for (size_t i=0; i < nodes_.size(); ++i) { nodes_[i].init_weights (mean, variance, magnitude); } } void Layer::forward_pass(const Layer &last_layer) { double rate = 1.0 - DROPOUT_RATE; const vector<double> & inputs = last_layer.get_outputs(); for (size_t i=0; i < nodes_.size(); ++i) { if (nodes_[i].droped_out_) { outputs_[i] = nodes_[i].value_ = 0; } else { nodes_[i].calculate_output_from (inputs); if (!output_layer_ && !netconfig_->training_) { nodes_[i].value_ *= rate; } outputs_[i] = nodes_[i].value_; } } } void Layer::calculate_output_layer_pdrv_of_error_wrt_input_weights(vector<double> const&inputs, vector<double> const&outputsExp, double totalExp, int right_ans ) { assert (outputsExp.size() == nodes_.size()); for (size_t i=0; i < nodes_.size(); ++i) { double drvLoss; if (i == right_ans) { drvLoss = outputsExp[i] / totalExp -1.0; } else { drvLoss = outputsExp[i] / totalExp; } nodes_[i].calculate_pdrv_of_error_wrt_input_weights (inputs, drvLoss); } } void Layer::calculate_middle_layer_pdrv_of_error_wrt_input_weights(const vector< double >& inputs, const Layer& output_layer) { for (size_t i=0; i < nodes_.size(); ++i) { nodes_[i].calculate_pdrv_of_error_wrt_input_weights (inputs, output_layer); } } void Layer::save_weights_to(vector< double >& weights) const { for (size_t i=0; i < nodes_.size(); ++i) { const vector<double> &w = nodes_[i].weights(); weights.insert(weights.end(), w.begin(), w.end()); } } size_t Layer::set_input_weights(const double* weights) { size_t pos = 0; for (size_t i=0; i < nodes_.size(); ++i) { pos += nodes_[i].set_input_weights(weights + pos); } return pos; } void Layer::set_outputs_of_input_layer(const vector< double >& value) { outputs_ = value; for (size_t i=0; i < nodes_.size(); ++i) { if (netconfig_->training_ && nodes_[i].droped_out_) { nodes_[i].value_ = outputs_[i] = 0; } else { nodes_[i].value_ = outputs_[i]; } } } const vector< double >& Layer::get_outputs() const { return outputs_; } size_t Layer::num_of_neurons() const { return nodes_.size(); } void Layer::clear_pdrvs() { for (size_t i=0; i < nodes_.size(); ++i) { nodes_[i].clear_pdrvs(); } } void Layer::update_weights(double learning_rate, double momentum, bool reguon) { for (size_t i=0; i < nodes_.size(); ++i) { nodes_[i].update_weights(learning_rate, momentum, reguon); } } void Layer::mark_dropout(double dropout_rate) { for (size_t i=0; i < nodes_.size(); ++i) { nodes_[i].droped_out_ = (g_rng.rand() < dropout_rate); } } void Layer::mark_no_dropout() { for (size_t i=0; i < nodes_.size(); ++i) { nodes_[i].droped_out_ = false; } } class NeuralNetwork { public: NeuralNetwork(int numInputs, vector<int> const& hiddenLayerTopology, int numOutpus); ~NeuralNetwork(); void init_weights(double mean_value, double variance_value, double magnitude); size_t number_of_weights () const; void run (vector<double> const &inputs, vector<double> &outputs); void train (vector<double> const &inputs, vector<double> &outputs); void forward_pass (vector<double> const &inputs); void backward_pass (vector<double> &outputs, int right_ans); void save_weights_to(vector<double> &weights) const; void set_weights(vector<double> const &weights); void clear_pdrvs (); void update_weights (double learning_rate, double momentum); double error () const { return last_error_; } bool regularization_on () const { return regularization_on_; } void set_regularization_on () { regularization_on_ = true; } protected: void feed_forward (); void mark_dropout (); void mark_no_dropout (); private: vector<Layer *> layers_; double last_error_; NetworkConfig *netconfig_; bool regularization_on_; NeuralNetwork(const NeuralNetwork &rhs) {} NeuralNetwork &operator=(const NeuralNetwork &rhs) { return *this; } }; NeuralNetwork::NeuralNetwork(int nInputs, const vector< int >& hiddenLayerTopology, int nOutputs) : last_error_(0) , netconfig_(new NetworkConfig) , regularization_on_(false) { layers_.resize(hiddenLayerTopology.size() + 2); // hidden + inputs + outputs int nNeuron = nInputs; layers_[0] = new Layer(netconfig_, nNeuron, 0); // layer zero is inputs for (size_t i=0; i < hiddenLayerTopology.size(); ++i) { layers_[i+1] = new Layer(netconfig_, hiddenLayerTopology[i], layers_[i]->num_of_neurons()); } nNeuron = nOutputs; layers_.back() = new Layer(netconfig_, nNeuron, hiddenLayerTopology.back()); } NeuralNetwork::~NeuralNetwork() { delete netconfig_; for (size_t i=0; i < layers_.size(); ++i) { delete layers_[i]; } layers_.clear(); } void NeuralNetwork::init_weights(double mean, double variance, double magnitude) { for (size_t i=1; i < layers_.size(); ++i) { layers_[i]->init_weights(mean, variance, magnitude); } } size_t NeuralNetwork::number_of_weights() const { vector<double> weights; this->save_weights_to(weights); return weights.size(); } void NeuralNetwork::run(const vector< double >& inputs, vector< double >& outputs) { if (netconfig_->training_) { netconfig_->training_ = false; mark_no_dropout (); } forward_pass(inputs); outputs = layers_.back()->get_outputs(); } void NeuralNetwork::train(const vector< double >& inputs, vector< double >& outputs) { if (!netconfig_->training_) { netconfig_->training_ = true; mark_dropout (); } forward_pass(inputs); outputs = layers_.back()->get_outputs(); } void NeuralNetwork::forward_pass(const vector< double >& inputs) { // feed forward // set inputs as outputs of first layer layers_[0]->set_outputs_of_input_layer(inputs); feed_forward (); } void NeuralNetwork::backward_pass(vector< double >& outputs, int right_ans) { // calculate errors/loss double denom = 0; double numer = 0; last_error_ = 0; double max_out=-DBL_MAX; for (size_t i=0; i < outputs.size(); ++i) { if (outputs[i] > max_out) { max_out = outputs[i]; } } vector<double> outputsExp; outputsExp.reserve(outputs.size()); double totalExp = 0; for (size_t i=0; i < outputs.size(); ++i) { outputsExp.push_back(exp(outputs[i] - max_out)); if (i == right_ans) { numer = outputsExp.back(); } denom += outputsExp.back(); } totalExp = denom; last_error_ = -log(numer / denom); // calculate partial derivatives // first the last layer(output layer) vector< double > layer_inputs = layers_[layers_.size()-2]->get_outputs(); layers_.back()->calculate_output_layer_pdrv_of_error_wrt_input_weights(layer_inputs, outputsExp, totalExp, right_ans); // back propagate for (int i=layers_.size()-2; i > 0; --i) { vector< double > layer_inputs = layers_[i-1]->get_outputs(); layers_[i]->calculate_middle_layer_pdrv_of_error_wrt_input_weights(layer_inputs, *layers_[i+1]); } } void NeuralNetwork::feed_forward() { for (size_t i=1; i < layers_.size(); ++i) { layers_[i]->forward_pass(*layers_[i-1]); } } void NeuralNetwork::mark_dropout() { for (size_t i=0; i < layers_.size()-1; ++i) { double r = DROPOUT_RATE; if (i==0) r = INPUT_DROPOUT_RATE; layers_[i]->mark_dropout(r); } } void NeuralNetwork::mark_no_dropout() { for (size_t i=0; i < layers_.size()-1; ++i) { layers_[i]->mark_no_dropout(); } } void NeuralNetwork::clear_pdrvs() { for (size_t i=1; i < layers_.size(); ++i) { layers_[i]->clear_pdrvs(); } } void NeuralNetwork::update_weights(double learning_rate, double momentum) { assert (netconfig_->training_ && "why update weights when not training?"); // update weights for (size_t i=1; i < layers_.size(); ++i) { layers_[i]->update_weights(learning_rate, momentum, regularization_on_); } mark_dropout(); clear_pdrvs(); } void NeuralNetwork::save_weights_to(vector< double >& weights) const { weights.clear(); for (size_t i=1; i < layers_.size(); ++i) { layers_[i]->save_weights_to (weights); } } void NeuralNetwork::set_weights(const vector< double >& weights) { int pos = 0; for (size_t i=1; i < layers_.size(); ++i) { pos += layers_[i]->set_input_weights (&weights[pos]); } } struct Genome { double fitness_; vector<double> genes_; Genome(size_t numGenes=0); void copy_genes_from (Genome const&source, size_t from_index, size_t to_index, size_t replace_start_index, double mutate_rate); void report () const; bool operator<(const Genome &rhs) const { return fitness_ > rhs.fitness_; } }; Genome::Genome(size_t numGenes) { assert (numGenes && "number of genes can't be zero"); genes_.resize(numGenes); } void Genome::copy_genes_from(const Genome& source, size_t from_index, size_t to_index, size_t replace_start_index, double mutate_rate) { for (size_t i=from_index,cgene=0; i < to_index; ++i,++cgene) { genes_[replace_start_index+cgene] = source.genes_[i]; } } void Genome::report() const { cout << "weights "; for (size_t i=0; i < genes_.size(); ++i) { cout << genes_[i] << " "; } } const int numInputPoints = 11; const int numOutputPoints = 4; struct Data { int value_; vector<double> inputs_; // binray representation int class_;// classify into 4 classes. 0 - no-change, 1 - fizz, 2 - buzz, 3 - fizzbuzz }; struct DataSet { vector<Data> samples_; void randomize () { vector<Data> sam; while (!samples_.empty()) { int idx = g_rng.randInt() % samples_.size(); sam.push_back(samples_[idx]); samples_[idx] = samples_.back(); samples_.pop_back(); } sam.swap(samples_); } }; DataSet sData; DataSet sDataTest; Data encode_binary (int value) { Data d; d.value_ = value; d.inputs_.reserve(numInputPoints); for (int i=0; i < numInputPoints; ++i) { int bin = (1 << i); if (value & bin) { d.inputs_.push_back(1.0); } else { d.inputs_.push_back(0); } } bool fizz = (value % 3 == 0); bool buzz = (value % 5 == 0); if (fizz && buzz) { d.class_ = 3; } else if (buzz) { d.class_ = 2; } else if (fizz) { d.class_ = 1; } else { d.class_ = 0; } return d; } void generate_data_set (DataSet &sData, int iFrom, int iTo) { for (int iC=iFrom; iC <= iTo; ++iC) { sData.samples_.push_back(encode_binary(iC)); } } int answer_from_outputs (vector<double> const&outputs) { double max_term=-DBL_MAX; int ans = -1; for (size_t i=0; i < outputs.size(); ++i) { if (outputs[i] > max_term) { max_term = outputs[i]; ans = i; } } return ans; } bool answer_correct (vector<double> const&outputs, int right_class) { return answer_from_outputs(outputs) == right_class; } int test_check (NeuralNetwork &network, bool print=false) { vector<double> outputs(numOutputPoints); cout << "test... "; if (sDataTest.samples_.empty()) generate_data_set (sDataTest, 1, 100); int correct_answer = 0; for (size_t i=0; i < sDataTest.samples_.size(); ++i) { const Data &d = sDataTest.samples_[i]; network.run(d.inputs_, outputs); int ans = answer_from_outputs(outputs); if (ans == d.class_) { ++correct_answer; } if (print) { if (ans == 0) { cout << d.value_ << " "; } else if (ans == 1) { cout << "fizz" << " "; } else if (ans == 2) { cout << "buzz" << " "; } else if (ans == 3) { cout << "fizzbuzz" << " "; } else { assert (0 && "what happened?"); } } } cout << "precision: " << (100.0 * double(correct_answer) / sDataTest.samples_.size()) << "%" << endl; return correct_answer; } int main() { int iSeed = time(NULL)%1000000; //iSeed = 587983; g_rng.seed(iSeed); cout << "use seed " << iSeed << endl; generate_data_set (sData, 101, 1000); sData.randomize(); vector<int> hidden_layers_topology; // hidden layers, from input to output hidden_layers_topology.push_back(700); /* hidden_layers_topology.push_back(100); hidden_layers_topology.push_back(100); //*/ NeuralNetwork network(numInputPoints, hidden_layers_topology, numOutputPoints); network.init_weights(0, 1.0, 1.0); const size_t numGenes = network.number_of_weights(); Genome entity(numGenes); network.save_weights_to(entity.genes_); vector<double> outputs(numOutputPoints); network.set_weights(entity.genes_); const double learning_rate = LEARNING_RATE; const double momentum = 0.9; const int batch_size = BATCH_SIZE; while (g_generation < 2000) { // cout << "generation " << g_generation << " "; ++g_generation; int run_count = 0; double max_error = 0; int correct_answer = 0; for (size_t i=0; i < sData.samples_.size(); ++i) { network.train(sData.samples_[i].inputs_, outputs); if (answer_correct(outputs, sData.samples_[i].class_)) { ++correct_answer; } network.backward_pass(outputs, sData.samples_[i].class_); max_error = network.error() > max_error ? network.error() : max_error; ++run_count; if (run_count >= batch_size) { network.update_weights(learning_rate, momentum); run_count = 0; } } if (run_count != 0) { network.update_weights(learning_rate, momentum); run_count = 0; } if (g_generation % 10 == 0) { cout << "generation " << g_generation << " "; int correct = test_check(network); if (!network.regularization_on () && double(correct_answer)/sData.samples_.size() - double(correct)/sDataTest.samples_.size() > 0.05 ) { network.set_regularization_on (); cout << "\t\t**** regularization on ****\n" << endl; } if (correct == sDataTest.samples_.size()) { cout << "done!\n"; break; } sData.randomize(); } } test_check(network, true); network.save_weights_to(entity.genes_); cout << "(" << entity.genes_.size() << ") weights\n"; double weights = 0; for (size_t i=0; i < entity.genes_.size(); ++i) { weights += fabs(entity.genes_[i]); } cout << "|totalWeights| = " << weights << "\n"; entity.report(); cout << "\n"; return 0; }
One thing I found interesting in this example is network with only one hidden layer outperform one with multiple hidden layers (the deep one). We are tempted to think that a 'deep' neural network is better, the original blog author suggested trying a deep one at the end of the blog, too. But, is it true? The fact that a shallow one outperform a deep one we experienced here suggest that is not necessarily true, it really depends on the problem on hand. We now realized that what deep learning excel is modularization. The typical example is image recognition where we see a well-trained network exhibits a hierarchical structure where minor basic features learned in front layers and more complex features learned in rear layers. On the other hand, a problem like fizz buzz which modularization does not help doesn't suit deep network. In fact, I bet genetic algorithm will handle this problem more easily, please try it yourself if you are interested.
Here is some observations:
- These hyper parameters have huge influence on training a network, you can't train a network with a 'wrong' parameter. The values in sample code were obtained by try and error. I believe there's studies on how to obtain better initial parameters. I guess it will be an important task if I make a living in machine learning.
- The paper where dropout was introduced suggesting use 0.5 as the dropout rate. Maybe there's error in my implementation, I can't train a network if the dropout rate was 0.5. Orz But, if the dropout rate was as low as you see in the sample code, it can train and it help. Thus, I guess bigger dropout rate only help if you have a huge network, with small one you should lower it.
- You will decide the magnitude of learning rate by the size of network. The bigger the network the smaller the learning rate you will use.
- There are L2 regularization an L1 regularization. With L2 regularization you multiply the weight with a value smaller than 1.0 when updating. With L1 regularization you subtract it with a fixed small value. What you see here in my code is L2 regularization. I find that a regularization factor between 0.01 and 0.001 is preferable, which means multiply 0.99 ~ 0.999 when updating. Too big your network will never train because any change introduced by gradient descend were cancelled. Too small it will only affect the network like a noise, the weights not necessarily become smaller.
- Batch size have big influence. Set it too small or too large you will have difficulty train the network.
- So... is there some method more scientifically to set these hyper parameters? Please enlighten me and leave a comment. Orz
沒有留言:
張貼留言