fix tsp and change probability on entropy

This commit is contained in:
Hubert Sokołowski
2021-05-16 23:21:24 +02:00
parent 8e4cfb3001
commit d2b728d248
6 changed files with 262 additions and 131 deletions

View File

@@ -15,15 +15,9 @@
/**
* @param {DecisionTreeBuilder} _builder
* @param {boolean} isChanged
*/
//TSP
function buildDecisionTreeMix(
_builder,
isChanged = false,
changedAttribute1 = null,
changedAttribute2 = null
) {
function buildDecisionTreeMix(_builder) {
//debugger;
const builder = { ..._builder };
const {
@@ -63,8 +57,7 @@ function buildDecisionTreeMix(
// LEAF
var initialEntropy = entropy(trainingSet, categoryAttr);
if (initialEntropy <= entropyThrehold && !isChanged) {
console.log('initialEntropy ' + initialEntropy + '<=' + entropyThrehold + ' entropyThrehold');
if (initialEntropy <= entropyThrehold) {
let _category = mostFrequentValue(trainingSet, categoryAttr);
let _positiveCounter = 0;
for (let element of trainingSet) {
@@ -96,10 +89,11 @@ function buildDecisionTreeMix(
var lowest;
var tmp;
var min = 1000;
var min = 0;
for (var alg of arrayOfTests) {
tmp = alg.maxDif;
if (tmp < min) {
//console.log(tmp);
if (tmp > min) {
lowest = alg;
min = tmp;
}
@@ -220,9 +214,7 @@ context.onmessage = function (event) {
};
function TSPDif(allClasses, attributes, trainingSet, categoryAttr) {
var right = 0,
left = 0;
var maxDif = 100;
var maxDif = 0;
var direction = '<';
/** @type {string | number} */ var attribute1 = -1;
/** @type {string | number} */ var attribute2 = -1;
@@ -231,11 +223,14 @@ function TSPDif(allClasses, attributes, trainingSet, categoryAttr) {
classMatrix = [new Array(allClasses.length).fill(0), new Array(allClasses.length).fill(0)],
match = [],
notMatch = [];
var initialEntropy = entropy(trainingSet, categoryAttr);
for (let attr1 of attributes) {
for (let attr2 of attributes) {
let attr1, attr2;
for (let i = 0; i < attributes.length; i++) {
attr1 = attributes[i];
for (let j = i + 1; j < attributes.length; j++) {
attr2 = attributes[j];
if (attr1 !== attr2) {
right = left = 0;
leftList = [];
rightList = [];
classMatrix = [new Array(allClasses.length).fill(0), new Array(allClasses.length).fill(0)];
@@ -245,33 +240,41 @@ function TSPDif(allClasses, attributes, trainingSet, categoryAttr) {
const attribute = element[categoryAttr];
if (element[attr1] < element[attr2]) {
left++;
leftList.push(element);
classMatrix[0][allClasses.indexOf(attribute)]++;
} else {
right++;
rightList.push(element);
classMatrix[1][allClasses.indexOf(attribute)]++;
}
}
// probability
var probR = 0,
probL = 0,
rankL = 0,
rankR = 0;
for (let k = 0; k < allClasses.length; k++) {
probL = left === 0 ? 0 : classMatrix[0][k] / left;
probR = right === 0 ? 0 : classMatrix[1][k] / right;
// var probR = 0,
// probL = 0,
// rankL = 0,
// rankR = 0;
// for (let k = 0; k < allClasses.length; k++) {
// probL = left === 0 ? 0 : classMatrix[0][k] / left;
// probR = right === 0 ? 0 : classMatrix[1][k] / right;
rankL += probL * probL;
rankR += probR * probR;
}
// rankL += probL * probL;
// rankR += probR * probR;
// }
// setting new values
var currentDif =
(right / trainingSet.length) * (1 - rankR) + (left / trainingSet.length) * (1 - rankL);
if (currentDif < maxDif) {
// // setting new values
// var currentDif =
// (right / trainingSet.length) * (1 - rankR) + (left / trainingSet.length) * (1 - rankL);
let matchEntropy = entropy(rightList, categoryAttr);
let notMatchEntropy = entropy(leftList, categoryAttr);
// calculating informational gain
let newEntropy = 0;
newEntropy += matchEntropy * rightList.length;
newEntropy += notMatchEntropy * leftList.length;
newEntropy /= trainingSet.length;
let currentDif = initialEntropy - newEntropy;
if (currentDif > maxDif) {
maxDif = currentDif;
attribute1 = attr1;
attribute2 = attr2;
@@ -292,7 +295,7 @@ function TSPWDif(allClasses, attributes, trainingSet, categoryAttr) {
L_weight = 0,
weight = 0,
direction = '<';
var maxDif = 100;
var maxDif = 0;
/** @type {string | number} */ var attribute1 = -1;
/** @type {string | number} */ var attribute2 = -1;
var leftList = [],
@@ -300,9 +303,13 @@ function TSPWDif(allClasses, attributes, trainingSet, categoryAttr) {
classMatrix = [new Array(allClasses.length).fill(0), new Array(allClasses.length).fill(0)],
match = [],
notMatch = [];
var initialEntropy = entropy(trainingSet, categoryAttr);
for (let attr1 of attributes) {
for (let attr2 of attributes) {
let attr1, attr2;
for (let i = 0; i < attributes.length; i++) {
attr1 = attributes[i];
for (let j = i + 1; j < attributes.length; j++) {
attr2 = attributes[j];
if (attr1 !== attr2) {
right = left = sum1 = sum2 = weight = 0;
leftList = [];
@@ -333,22 +340,17 @@ function TSPWDif(allClasses, attributes, trainingSet, categoryAttr) {
classMatrix[1][allClasses.indexOf(attribute)]++;
}
}
var probR = 0,
probL = 0,
rankL = 0,
rankR = 0;
for (let k = 0; k < allClasses.length; k++) {
probL = left === 0 ? 0 : classMatrix[0][k] / left;
probR = right === 0 ? 0 : classMatrix[1][k] / right;
rankL += probL * probL;
rankR += probR * probR;
}
let matchEntropy = entropy(rightList, categoryAttr);
let notMatchEntropy = entropy(leftList, categoryAttr);
var currentDif =
(right / trainingSet.length) * (1 - rankR) + (left / trainingSet.length) * (1 - rankL);
if (currentDif < maxDif) {
// calculating informational gain
let newEntropy = 0;
newEntropy += matchEntropy * rightList.length;
newEntropy += notMatchEntropy * leftList.length;
newEntropy /= trainingSet.length;
let currentDif = initialEntropy - newEntropy;
if (currentDif > maxDif) {
maxDif = currentDif;
attribute1 = attr1;
attribute2 = attr2;

View File

@@ -56,7 +56,7 @@ function buildDecisionTreeTSP(
var right = 0,
left = 0;
var maxDif = 100,
var maxDif = 0,
currentDif;
/** @type {string | number} */ var attribute1 = -1;
/** @type {string | number} */ var attribute2 = -1;
@@ -94,28 +94,44 @@ function buildDecisionTreeTSP(
}
}
// probability
probR = 0;
probL = 0;
rankL = 0;
rankR = 0;
for (let k = 0; k < builder.allClasses.length; k++) {
probL = left === 0 ? 0 : classMatrix[0][k] / left;
probR = right === 0 ? 0 : classMatrix[1][k] / right;
// // probability
// probR = 0;
// probL = 0;
// rankL = 0;
// rankR = 0;
// for (let k = 0; k < builder.allClasses.length; k++) {
// probL = left === 0 ? 0 : classMatrix[0][k] / left;
// probR = right === 0 ? 0 : classMatrix[1][k] / right;
rankL += probL * probL;
rankR += probR * probR;
}
// rankL += probL * probL;
// rankR += probR * probR;
// }
// setting new values
currentDif = (right / trainingSet.length) * (1 - rankR) + (left / trainingSet.length) * (1 - rankL);
// // setting new values
// currentDif = (right / trainingSet.length) * (1 - rankR) + (left / trainingSet.length) * (1 - rankL);
// maxDif = currentDif;
// attribute1 = changedAttribute1;
// attribute2 = changedAttribute2;
// match = leftList;
// notMatch = rightList;
//podzial = classMatrix;
let matchEntropy = entropy(rightList, categoryAttr);
let notMatchEntropy = entropy(leftList, categoryAttr);
// calculating informational gain
let newEntropy = 0;
newEntropy += matchEntropy * rightList.length;
newEntropy += notMatchEntropy * leftList.length;
newEntropy /= trainingSet.length;
let currentDif = initialEntropy - newEntropy;
maxDif = currentDif;
attribute1 = changedAttribute1;
attribute2 = changedAttribute2;
match = leftList;
notMatch = rightList;
//podzial = classMatrix;
isChanged = false;
} else if (isUpdate && !isChanged) {
@@ -165,34 +181,52 @@ function buildDecisionTreeTSP(
}
}
// probability
probR = 0;
probL = 0;
rankL = 0;
rankR = 0;
for (let k = 0; k < builder.allClasses.length; k++) {
probL = left === 0 ? 0 : classMatrix[0][k] / left;
probR = right === 0 ? 0 : classMatrix[1][k] / right;
// // probability
// probR = 0;
// probL = 0;
// rankL = 0;
// rankR = 0;
// for (let k = 0; k < builder.allClasses.length; k++) {
// probL = left === 0 ? 0 : classMatrix[0][k] / left;
// probR = right === 0 ? 0 : classMatrix[1][k] / right;
rankL += probL * probL;
rankR += probR * probR;
}
// rankL += probL * probL;
// rankR += probR * probR;
// }
// setting new values
currentDif = (right / trainingSet.length) * (1 - rankR) + (left / trainingSet.length) * (1 - rankL);
// // setting new values
// currentDif = (right / trainingSet.length) * (1 - rankR) + (left / trainingSet.length) * (1 - rankL);
// maxDif = currentDif;
// attribute1 = changedAttribute1;
// attribute2 = changedAttribute2;
// match = leftList;
// notMatch = rightList;
//podzial = classMatrix;
let matchEntropy = entropy(rightList, categoryAttr);
let notMatchEntropy = entropy(leftList, categoryAttr);
// calculating informational gain
let newEntropy = 0;
newEntropy += matchEntropy * rightList.length;
newEntropy += notMatchEntropy * leftList.length;
newEntropy /= trainingSet.length;
let currentDif = initialEntropy - newEntropy;
maxDif = currentDif;
attribute1 = changedAttribute1;
attribute2 = changedAttribute2;
attribute1 = oldTree.attr2;
attribute2 = oldTree.pivot;
match = leftList;
notMatch = rightList;
//podzial = classMatrix;
}
} else {
let attr1, attr2;
for (let i = 0; i < attributes.length; i++) {
attr1 = attributes[i];
for (let j = i + 1; j < attributes.length; j++) {
attr2 = attributes[j];
if (attr1 !== attr2) {
right = left = 0;
leftList = [];
@@ -217,28 +251,45 @@ function buildDecisionTreeTSP(
}
}
// probability
probR = 0;
probL = 0;
rankL = 0;
rankR = 0;
for (let k = 0; k < builder.allClasses.length; k++) {
probL = left === 0 ? 0 : classMatrix[0][k] / left;
probR = right === 0 ? 0 : classMatrix[1][k] / right;
// // probability
// probR = 0;
// probL = 0;
// rankL = 0;
// rankR = 0;
// for (let k = 0; k < builder.allClasses.length; k++) {
// probL = left === 0 ? 0 : classMatrix[0][k] / left;
// probR = right === 0 ? 0 : classMatrix[1][k] / right;
rankL += probL * probL;
rankR += probR * probR;
}
// rankL += probL * probL;
// rankR += probR * probR;
// }
// setting new values
currentDif = (right / trainingSet.length) * (1 - rankR) + (left / trainingSet.length) * (1 - rankL);
if (currentDif < maxDif) {
// // setting new values
// currentDif = (right / trainingSet.length) * (1 - rankR) + (left / trainingSet.length) * (1 - rankL);
// if (currentDif < maxDif) {
// maxDif = currentDif;
// attribute1 = attr1;
// attribute2 = attr2;
// match = leftList;
// notMatch = rightList;
// //podzial = classMatrix;
// }
let matchEntropy = entropy(rightList, categoryAttr);
let notMatchEntropy = entropy(leftList, categoryAttr);
// calculating informational gain
let newEntropy = 0;
newEntropy += matchEntropy * rightList.length;
newEntropy += notMatchEntropy * leftList.length;
newEntropy /= trainingSet.length;
let currentDif = initialEntropy - newEntropy;
if (currentDif > maxDif) {
maxDif = currentDif;
attribute1 = attr1;
attribute2 = attr2;
match = leftList;
notMatch = rightList;
//podzial = classMatrix;
}
}
}

View File

@@ -87,7 +87,7 @@ function buildDecisionTreeTSPW(
sum1 = 0,
sum2 = 0,
L_weight = 0;
var maxDif = 100;
var maxDif = 0;
/** @type {string | number} */ var attribute1 = -1;
/** @type {string | number} */ var attribute2 = -1;
var directrion = '<';
@@ -124,22 +124,38 @@ function buildDecisionTreeTSPW(
}
}
// probability
probR = 0;
probL = 0;
rankL = 0;
rankR = 0;
for (let k = 0; k < builder.allClasses.length; k++) {
probL = left === 0 ? 0 : classMatrix[0][k] / left;
probR = right === 0 ? 0 : classMatrix[1][k] / right;
// // probability
// probR = 0;
// probL = 0;
// rankL = 0;
// rankR = 0;
// for (let k = 0; k < builder.allClasses.length; k++) {
// probL = left === 0 ? 0 : classMatrix[0][k] / left;
// probR = right === 0 ? 0 : classMatrix[1][k] / right;
rankL += probL * probL;
rankR += probR * probR;
}
// rankL += probL * probL;
// rankR += probR * probR;
// }
// setting new values
currentDif = (right / trainingSet.length) * (1 - rankR) + (left / trainingSet.length) * (1 - rankL);
// // setting new values
// currentDif = (right / trainingSet.length) * (1 - rankR) + (left / trainingSet.length) * (1 - rankL);
// maxDif = currentDif;
// attribute1 = changedAttribute1;
// attribute2 = changedAttribute2;
// match = leftList;
// notMatch = rightList;
// L_weight = weight;
let matchEntropy = entropy(rightList, categoryAttr);
let notMatchEntropy = entropy(leftList, categoryAttr);
// calculating informational gain
let newEntropy = 0;
newEntropy += matchEntropy * rightList.length;
newEntropy += notMatchEntropy * leftList.length;
newEntropy /= trainingSet.length;
let currentDif = initialEntropy - newEntropy;
maxDif = currentDif;
attribute1 = changedAttribute1;
attribute2 = changedAttribute2;
@@ -191,21 +207,16 @@ function buildDecisionTreeTSPW(
}
}
probR = 0;
probL = 0;
rankL = 0;
rankR = 0;
for (let k = 0; k < builder.allClasses.length; k++) {
probL = left === 0 ? 0 : classMatrix[0][k] / left;
probR = right === 0 ? 0 : classMatrix[1][k] / right;
let matchEntropy = entropy(rightList, categoryAttr);
let notMatchEntropy = entropy(leftList, categoryAttr);
rankL += probL * probL;
rankR += probR * probR;
}
currentDif = (right / trainingSet.length) * (1 - rankR) + (left / trainingSet.length) * (1 - rankL);
if (currentDif < maxDif) {
// calculating informational gain
let newEntropy = 0;
newEntropy += matchEntropy * rightList.length;
newEntropy += notMatchEntropy * leftList.length;
newEntropy /= trainingSet.length;
let currentDif = initialEntropy - newEntropy;
if (currentDif > maxDif) {
maxDif = currentDif;
attribute1 = attr1;
attribute2 = attr2;
@@ -259,7 +270,7 @@ function buildDecisionTreeTSPW(
trainingSet2: trainingSet,
};
}
console.log('-----------Podział-----------');
//console.log('-----------Podział-----------');
builder.maxTreeDepth = maxTreeDepth - 1;
builder.trainingSet = match;
var matchSubTree = buildDecisionTreeTSPW(builder);

View File

@@ -20,7 +20,7 @@ import { executeAlgorithm } from '../utils/algorithm-executor';
import TestSetFileReader from './TestSetFileReader';
import ConfusionMatrix from './ConfusionMatrix';
import { getSizeTree } from '../utils/size-checker';
import { testTree } from '../services/playground3';
import { rebuildTestTree } from '../utils/RebuilderTestTree';
/**
* @typedef {import('../utils/decision-tree.js').DecisionTreeBuilder} DecisionTreeBuilder
@@ -95,11 +95,11 @@ const Tree = ({ options }) => {
console.log(options.categoryAttr);
let tmpRoot = JSON.parse(JSON.stringify(newRoot));
if (testSet == null) {
testTree(tmpRoot, options.trainingSet, options.categoryAttr);
rebuildTestTree(tmpRoot, options.trainingSet, options.categoryAttr);
console.log(tmpRoot);
setSecondRoot(tmpRoot);
} else {
testTree(tmpRoot, testSet, options.categoryAttr);
rebuildTestTree(tmpRoot, testSet, options.categoryAttr);
console.log(tmpRoot);
setSecondRoot(tmpRoot);
}

View File

@@ -32,13 +32,13 @@ export function testTree(tree, newData, categoryAttr) {
let match;
if (tree.predicateName === '==' || tree.predicateName === '>=') {
match = predicate(x[tree.attr2], tree.pivot);
console.log('c45', match);
//console.log('c45', match);
} else if (tree.weight) {
match = predicate(x[tree.attr2], x[tree.pivot], tree.weight);
console.log('tspw', match);
//console.log('tspw', match);
} else {
match = predicate(x[tree.attr2], x[tree.pivot]);
console.log('tsp', match);
//console.log('tsp', match);
}
match ? matchedData.push(x) : notMatchedData.push(x);

View File

@@ -0,0 +1,67 @@
export function rebuildTestTree(tree, newData, categoryAttr) {
let predicate;
if (tree.category) {
tree.trainingSet2 = newData;
let _positiveCounter = 0,
_quality = 0;
for (let element of newData) {
if (element[categoryAttr] === tree.category) _positiveCounter++;
}
let _negativeCounter = newData.length - _positiveCounter;
_quality = _positiveCounter / newData.length;
_quality = _quality * 100;
tree.quality = _quality.toFixed(2);
tree.matchedCount = _positiveCounter;
tree.notMatchedCount = _negativeCounter;
return;
} else {
tree.nodeSet = newData;
if (tree.weight) {
predicate = predicates['w'];
} else {
predicate = predicates[tree.predicateName];
}
let matchedData = [],
notMatchedData = [];
newData.forEach(x => {
let match;
if (tree.predicateName === '==' || tree.predicateName === '>=') {
match = predicate(x[tree.attr2], tree.pivot);
//console.log('c45', match);
} else if (tree.weight) {
match = predicate(x[tree.attr2], x[tree.pivot], tree.weight);
//console.log('tspw', match);
} else {
match = predicate(x[tree.attr2], x[tree.pivot]);
//console.log('tsp', match);
}
match ? matchedData.push(x) : notMatchedData.push(x);
});
tree.matchedCount = matchedData.length;
tree.notMatchedCount = notMatchedData.length;
rebuildTestTree(tree.match, matchedData, categoryAttr);
rebuildTestTree(tree.notMatch, notMatchedData, categoryAttr);
}
}
var predicates = {
'==': function (a, b) {
return a === b;
},
'>=': function (a, b) {
return a >= b;
},
'<': function (a, b) {
return a < b;
},
w: function (a, b, w) {
return a < w * b;
},
};