C++java实现的⼀颗纯MCTS
模拟1000次后,在根节点下⾯选出⼀个最好的⼦节点
(Clion运⾏c++时要把其他⽆关⽂件注释掉,不然运⾏main程序报错,⼤概会引⽤到其他⽂件的函数名相同的上)C++实现:
//
// on 2017/11/19.
//
#include <iostream>
#include <asrt.h>
#include <stack>
#include <limits>
#include <cmath>
using namespace std;
const double EPSILON = 1e-6;
class UCTreeNode {
private:
UCTreeNode *vpChildren_i[5];
bool isLeaf_i=true;
double nVisits_i=0;
double totValue_i=0;
int childNum=5;
int lectAction() {
asrt(!isLeaf_i); //不是叶⼦结点
int lected = 0;
double bestValue = -numeric_limits<double>::max();
for (int k = 0; k < childNum; ++k) //遍历n个孩⼦结点
{
UCTreeNode *pCur = vpChildren_i[k]; // ptr to current child node
//asrt(0 != pCur); //孩⼦结点不是空
double uctValue = pCur->totValue_i / (pCur->nVisits_i + EPSILON) +
sqrt(log(nVisits_i + 1) / (pCur->nVisits_i + EPSILON));
if (uctValue >= bestValue) {
lected = k;
bestValue = uctValue;
穿井}
} // for loop
return lected; //找出uct最⼤的结点返回
} // lectAction
void expand() {
if (!isLeaf_i)
return;
//isLeaf_i = fal;
一如继往
for (int k = 0; k < childNum; ++k)
vpChildren_i[k] = new UCTreeNode();
isLeaf_i=fal;
} // expand
int rollOut() //返回最后的结果值
{
return rand() % 2;
} // rollout
void updateStats(int value) {
nVisits_i++; // increment the number of visits
totValue_i += value; // update the total value for all visits
}
public:
UCTreeNode() {
for (int k = 0; k < childNum; ++k) {
vpChildren_i[k] = 0;
}
} // default constructor
UCTreeNode(const UCTreeNode &tree) {
if (isLeaf_i) {
氙氚return;
老枞水仙}
for (int k = 0; k < childNum; ++k) {
asrt(0 != tree.vpChildren_i[k]);
vpChildren_i[k] = new UCTreeNode(*tree.vpChildren_i[k]);
}
} // copy constructor
bool isLeaf() const {
return isLeaf_i;
}
void iterate() {
stack<UCTreeNode *> visited;交谊舞大全
UCTreeNode *pCur = this;
visited.push(this);
int action = 0; // next lected action
while (!pCur->isLeaf()) {
action = pCur->lectAction();
pCur = pCur->vpChildren_i[action];
visited.push(pCur);
}
pCur->expand();
action = pCur->lectAction();
pCur = pCur->vpChildren_i[action];
visited.push(pCur);
double value = rollOut();
while (!pty()) {
pCur = p();
/
/ get the current node in the path
pCur->updateStats(value); // update statistics
visited.pop();
pCur->Value();
// remove the current node from the stack
}
} // iterate
int bestAction() { //返回utc最⼤的那个值
int lected = 0;
double bestValue = -numeric_limits<double>::max();
for (int k = 0; k < childNum; ++k) {
UCTreeNode *pCur = vpChildren_i[k]; // ptr to current child node
asrt(0 != pCur);
double expValue = pCur->totValue_i / (pCur->nVisits_i + EPSILON); expValue += static_cast<double>(rand()) * EPSILON / RAND_MAX;
if (expValue >= bestValue) {
lected = k;
lected = k;
bestValue = expValue;
}
} // for loop
return lected;
} // bestAction
void Value() const {
cout << totValue_i << "/" << nVisits_i << endl;
}
};
int main(){
UCTreeNode tree;
for(int k=0; k<1000; ++k)
{
tree.iterate();
cout << endl;
}
cout << endl;
int bestAction = tree.bestAction();
cout << "Best Action: " << bestAction << std::endl;
return0;
}
java实现:
import java.util.LinkedList;
import java.util.List;
import java.util.Random;
色妻交换public class TreeNode {
static Random r = new Random();
static int nActions=5; //五个步骤,也就是五个⼦节点
static double epsilon =1e-6;
TreeNode[] children; //该结点的五个⼦节点
int nVisits,totValue; //总的访问次数,总胜负次数
public boolean isLeaf(){ //是不是下⾯没有⼦结点
return children==null;
}
public TreeNode lect(){ //按照uct公式计算每个⼦节点,找出最⼤值,返回该结点。
TreeNode lected=null;
double bestValue =Double.
;
for (TreeNode c:children){ //计算每个孩⼦的uct的值
double uctValue =c.totValue/ (c.nVisits+epsilon)+
Math.sqrt(Math.log(nVisits+1)/(c.nVisits+epsilon))+r.nextDouble()*epsilon;
if(uctValue>bestValue){
lected=c;
bestValue=uctValue;
}
}
return lected;
}
public void expand(){ //扩展当前结点的5个孩⼦结点
children=new TreeNode[nActions]; //扩展当前结点的⼦节点,扩展5个孩⼦for(int i=0;i<nActions;i++){
for(int i=0;i<nActions;i++){
children[i]=new TreeNode(); //对于⼀个类的数组,中间每⼀个都要进⾏初始化
}
}
public void lectAction(){ //这⾥是最关键的函数
List<TreeNode> visited =new LinkedList<>(); //存储访问路径上⾯的结点
TreeNode cur=this; //当前结点
System.out.print("当前结点为:"+Value+"/"+cur.nVisits+" \n ");
visited.add(this);
while(!cur.isLeaf()){ //如果当前结点不是最底层节点
cur=cur.lect(); //往下⾛,把当前结点设置为uct最⼤的那个⼦结点
visited.add(cur); //把选择过的结点都加到visited队列⾥⾯
System.out.print("下⼀级结点是"+Value+"/"+cur.nVisits+" ");
}
System.out.print("\n");
TreeNode newNode = cur.lect();
visited.add(newNode);
int value=rollOut();
for (TreeNode node :visited){ //搜索路径上⾯的每个结点都要重新更新值666表情
//对于n个参与者的游戏需要其他的逻辑
node.updateState(value);
}
}
public int rollOut(){ //随机返回tn节点的胜负,这⾥可以有更加优化的算法
Int(2); //该⽅法的作⽤是⽣成⼀个随机的int值,该值介于[0,n)的区间,这⾥也就是0或者1 }
public void updateState(double value){
nVisits++; // 该节点的访问次数+1
totValue+=value; //该节点的胜利次数+1
}
public int arity(){ //返回有⼏个孩⼦
万泉达return children==null?0:children.length;
}
}
class m{
public static void main(String[] args) {
TreeNode tree=new TreeNode();
tree.nVisits=0;
int n=0;
while(n++<1000) {
tree.lectAction();
}
System.out.println(tree.lect().totValue+"/"+tree.lect().nVisits);
}
}