有興趣的朋友不要浪費時間看這篇blog了,你現在須要做的事是,前往
http://cs231n.github.io/
開始學習。
另外在youtube上也有課程可以看
Stanford
台大
相信很快就能懂得比我多。(我真的不是專家XD)
No school is better than real school!
唔,你還在看我嗎?
好吧,連載再開第一篇,我們來學習基因演算法吧。
這跟人工智慧有什麼關係?
似乎在很久以前,除了目前主流的gradient descend, backpropagation的方法尋找一個神經網路的weights之外,人們還有一招基因演算法,只是現在只剩下少部分人在進行研究的樣子。
那麼我們幹麻還管這種冷門的東西?因為我只是興趣使然的研究,是不是熱門項目其實不重要。
因為這招從生物演化悟出來的方法在解某些問題上是真的有用的,其實令人嘖嘖稱奇!
OK, 基因演算法很簡單,就下面幾個步驟:
- 用亂數產生一群用某種方式表達的生命體,我們稱這個表達式叫基因組。(可用bit, byte, float, etc.)
- 把這些生命體放到環境中測試。回合結束時它們會得到一個分數。(比如說吃到多少糖,跑多遠,活了多久等)
- 以它們得到的分數為機率的大小進行繁殖下一代,分數越高的越容易被選中。每選兩個個體用亂數決定進行基因交換(crossover)或直接進入下一輪。複製基的過程中有機會發生突變, 如bit 0變1,或float值加減一數值等。
- 產生足夠的個體後回到步驟2進行下一輪,直到我們找到解答。
你應該有注意到上面有一大堆使用亂數的地方,說到底基因演算法就是有系統的嘗試各種解答組合,如果純用亂數找,就算找得到解答也可能會讓電腦算到天荒地老...
以下我提供一個範僅供參考,它可以找到能算XOR的網路。
不過在那之前我們還是說一下什麼叫神經網路好了。
神經網路就是有很多神經元組成的網路,每個神經元的輸入連接到數個神經元的輸出,計算時是從最外部的輸入開始,每個神經元把它的輸入加起來經過一個activate函數算出一個輸出,直到算完最後的輸出。這個activate函數可能是 sigmoid, tanh, relu等,看我們的網路的目的。
一個可以解XOR問題的網路如下面影片
網路的重點其實在bias, 沒bias的話,整個網路是線性的,有了bias才能處理非線性的問題。
/* * This sample use GA to find a solution for XOR problem */ #include <vector> #include <deque> #include <algorithm> #include <iostream> #include <cmath> #include <cassert> #include <ctime> #include "MesenneTwister.h" using namespace std; #define MAX_INIT_WEIGHT 30.0 #define MAX_MUTATE_WEIGHT 15.0 #define MAX_FITNESS 1000000.0 const double crossover_rate = 0.8; const double mutate_rate = 0.6; const size_t numPopulation = 100; const size_t max_generation = 100; const double epsilon = 0.00001; MTRand g_rng(time(NULL)); double sigmoid(double x) { return 1.0 / (1.0 + exp(-x)); } class Node // well, 'node' is shorter than 'neuron'... { public: Node (int numInputs); double run(vector<double> const&inputs); void getWeights(vector<double> &weights); size_t setWeights(const double *weights); private: vector<double> weights_; }; Node::Node(int numInputs) { weights_.resize(numInputs+1); } double Node::run(const vector< double >& inputs) { assert (inputs.size()+1 == weights_.size() && "number of inputs wrong"); double v=0; for (size_t i=0; i < inputs.size(); ++i) { v += inputs[i] * weights_[i]; } v += weights_.back(); return sigmoid(v); } void Node::getWeights(vector< double >& weights) { weights.insert(weights.end(), weights_.begin(), weights_.end()); } size_t Node::setWeights(const double* weights) { weights_.assign(weights, weights + weights_.size()); return weights_.size(); } class Layer { public: Layer(int numNodes, int numInputs); ~Layer(); void run(vector<double> const&inputs, vector<double> &outputs); void getWeights(vector<double> &weights); size_t setWeights(const double *weights); private: vector<Node *> nodes_; Layer(Layer const&rhs) {}; Layer &operator=(Layer const&rhs) { return *this;} }; Layer::Layer(int numNodes, int numInputs) { nodes_.resize(numNodes); for (size_t i=0; i < nodes_.size(); ++i) { nodes_[i] = new Node(numInputs); } } Layer::~Layer() { for (size_t i=0; i < nodes_.size(); ++i) { delete nodes_[i]; } nodes_.clear(); } void Layer::run(const vector< double >& inputs, vector< double >& outputs) { outputs.resize(nodes_.size()); for (size_t i=0; i < nodes_.size(); ++i) { outputs[i] = nodes_[i]->run(inputs); } } void Layer::getWeights(vector< double >& weights) { for (size_t i=0; i < nodes_.size(); ++i) { nodes_[i]->getWeights(weights); } } size_t Layer::setWeights(const double* weights) { size_t pos = 0; for (size_t i=0; i < nodes_.size(); ++i) { pos += nodes_[i]->setWeights(weights + pos); } return pos; } class NeuralNetwork { public: NeuralNetwork(int numInputs, vector<int> const& hiddenLayerTopology, int numOutpus); ~NeuralNetwork(); void run (vector<double> const &inputs, vector<double> &outputs); void getWeights(vector<double> &weights); void setWeights(vector<double> const &weights); private: vector<Layer *> layers_; NeuralNetwork(const NeuralNetwork &rhs) {} NeuralNetwork &operator=(const NeuralNetwork &rhs) { return *this; } }; NeuralNetwork::NeuralNetwork(int numInputs, const vector< int >& hiddenLayerTopology, int numOutputs) { layers_.resize(hiddenLayerTopology.size() + 1); layers_[0] = new Layer(hiddenLayerTopology[0], numInputs); for (size_t i=1; i < layers_.size()-1; ++i) { layers_[i] = new Layer(hiddenLayerTopology[i], hiddenLayerTopology[i-1]); } layers_.back() = new Layer(numOutputs, hiddenLayerTopology.back()); } NeuralNetwork::~NeuralNetwork() { for (size_t i=0; i < layers_.size(); ++i) { delete layers_[i]; } layers_.clear(); } void NeuralNetwork::run(const vector< double >& inputs, vector< double >& outputs) { vector< double > linputs, loutputs; linputs = inputs; for (size_t i=0; i < layers_.size(); ++i) { layers_[i]->run(linputs, loutputs); linputs = loutputs; } assert (outputs.size() == linputs.size()); outputs.swap(linputs); } void NeuralNetwork::getWeights(vector< double >& weights) { weights.clear(); for (size_t i=0; i < layers_.size(); ++i) { layers_[i]->getWeights(weights); } } void NeuralNetwork::setWeights(const vector< double >& weights) { int pos = 0; for (size_t i=0; i < layers_.size(); ++i) { pos += layers_[i]->setWeights(&weights[pos]); } } struct Genome { double fitness_; vector<double> genes_; Genome(size_t numGenes=0); void init(size_t numGenes); void copy_genes_from (Genome const&source, size_t from_index, size_t to_index, size_t replace_start_index, double mutate_rate); void report () const; bool operator<(const Genome &rhs) const { return fitness_ > rhs.fitness_; } }; Genome::Genome(size_t numGenes) { if (numGenes) { genes_.resize(numGenes); } } void Genome::init(size_t numGenes) { genes_.resize(numGenes); for (size_t i=0; i < numGenes; ++i) { genes_[i] = (g_rng.rand() - 0.5) * 2 * MAX_INIT_WEIGHT; } } void Genome::copy_genes_from(const Genome& source, size_t from_index, size_t to_index, size_t replace_start_index, double mutate_rate) { for (size_t i=from_index,cgene=0; i < to_index; ++i,++cgene) { genes_[replace_start_index+cgene] = source.genes_[i]; if (g_rng.rand() < mutate_rate) { genes_[replace_start_index+cgene] += (g_rng.rand() - 0.5) * 2 * MAX_MUTATE_WEIGHT; } } } void Genome::report() const { cout << "weights "; for (size_t i=0; i < genes_.size(); ++i) { cout << genes_[i] << " "; } } void spawn_new_generation (vector<Genome> &population) { assert (!population.empty()); double total_fitness = 0; const size_t numPopulation = population.size(); const size_t numGenes = population[0].genes_.size(); for (size_t i=0; i < numPopulation; ++i) { total_fitness += population[i].fitness_; } // make fitness order list deque<double> fitness_list(numPopulation+1); // normalize fitness, put in a ordered list for search later for (int p=0; p < numPopulation; ++p) { population[p].fitness_ /= total_fitness; fitness_list[p+1] = population[p].fitness_ + fitness_list[p]; } // new population by crossover, mutation vector<Genome> new_population; new_population.reserve(numPopulation); while (new_population.size() < numPopulation) { int lidx = upper_bound(fitness_list.begin(), fitness_list.end(), g_rng.rand()) - fitness_list.begin(); --lidx; int ridx = upper_bound(fitness_list.begin(), fitness_list.end(), g_rng.rand()) - fitness_list.begin(); --ridx; Genome a(numGenes); Genome b(numGenes); if (g_rng.rand() < crossover_rate) { size_t switch_point = g_rng.randInt() % (numGenes-4); // switch point at begin or end doesn't make sense. switch_point += 4; a.copy_genes_from (population[lidx], 0, switch_point, 0, mutate_rate); a.copy_genes_from (population[ridx], switch_point, numGenes, switch_point, mutate_rate); b.copy_genes_from (population[ridx], 0, switch_point, 0, mutate_rate); b.copy_genes_from (population[lidx], switch_point, numGenes, switch_point, mutate_rate); } else { a.copy_genes_from (population[lidx], 0, numGenes, 0, mutate_rate); b.copy_genes_from (population[ridx], 0, numGenes, 0, mutate_rate); } new_population.push_back(a); new_population.push_back(b); } new_population.swap(population); } double calculate_fitness(vector<double> const&results, vector<double> const&expects) { assert (results.size() == expects.size()); double delta = 0; for (size_t i=0; i < results.size(); ++i) { delta += fabs(results[i] - expects[i]); } double fitness; if (delta <= epsilon) { fitness = MAX_FITNESS; } else { fitness = 1.0 / (delta*delta); fitness *= fitness; } cout << "results "; for (size_t i=0; i < results.size(); ++i) { cout << results[i] << ", "; } cout << " fitness " << fitness; cout << endl; return fitness; } int main() { // XOR test, two inputs, one output vector<int> topo; // hidden layers topo.push_back(2); NeuralNetwork network(2, topo, 1); vector<double> weights; network.getWeights(weights); const size_t numGenes = weights.size(); /* vector<double> sol; sol.push_back(20); sol.push_back(20); sol.push_back(-10); sol.push_back(-20); sol.push_back(-20); sol.push_back(30); sol.push_back(20); sol.push_back(20); sol.push_back(-30); //*/ vector<Genome> population(numPopulation); for (size_t i=0; i < population.size(); ++i) { population[i].init(numGenes); //population[i].genes_ = sol; } bool found_solution = false; for (int generation = 0; !found_solution && generation < max_generation; ++generation) { cout << "generation " << generation << endl; vector<double> inputs(2); vector<double> outputs(1); vector<double> results(4); vector<double> expects(4); expects[0] = 0; expects[1] = 0; expects[2] = 1; expects[3] = 1; for (size_t i=0; i < population.size(); ++i) { network.setWeights(population[i].genes_); inputs[0] = 0; inputs[1] = 0; network.run(inputs, outputs); results[0] = outputs[0]; inputs[0] = 1; inputs[1] = 1; network.run(inputs, outputs); results[1] = outputs[0]; inputs[0] = 0; inputs[1] = 1; network.run(inputs, outputs); results[2] = outputs[0]; inputs[0] = 1; inputs[1] = 0; network.run(inputs, outputs); results[3] = outputs[0]; population[i].fitness_ = calculate_fitness(results, expects); if (population[i].fitness_ == MAX_FITNESS) { found_solution = true; cout << "*** found solution "; population[i].report(); cout << " ***\n"; cout << "at generation " << generation << endl; ::exit(0); } } spawn_new_generation (population); } return 0; }
像我範例中一次使用100個個體做運算,通常3-5代就能找到答案。
剛做好時真的覺得蠻神奇的,竟然真的有用!
好了,這篇就到這邊,下回見!
沒有留言:
張貼留言