From d2b728d248dca6421191a879504cd9f6745098be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hubert=20Soko=C5=82owski?= Date: Sun, 16 May 2021 23:21:24 +0200 Subject: [PATCH] fix tsp and change probability on entropy --- public/algorithms/mix-tree.js | 102 +++++++++---------- public/algorithms/tsp-tree.js | 143 ++++++++++++++++++--------- public/algorithms/tsp-weight-tree.js | 69 +++++++------ src/components/Tree.jsx | 6 +- src/services/playground3.js | 6 +- src/utils/RebuilderTestTree.js | 67 +++++++++++++ 6 files changed, 262 insertions(+), 131 deletions(-) create mode 100644 src/utils/RebuilderTestTree.js diff --git a/public/algorithms/mix-tree.js b/public/algorithms/mix-tree.js index 27b5a68..473a001 100644 --- a/public/algorithms/mix-tree.js +++ b/public/algorithms/mix-tree.js @@ -15,15 +15,9 @@ /** * @param {DecisionTreeBuilder} _builder - * @param {boolean} isChanged */ //TSP -function buildDecisionTreeMix( - _builder, - isChanged = false, - changedAttribute1 = null, - changedAttribute2 = null -) { +function buildDecisionTreeMix(_builder) { //debugger; const builder = { ..._builder }; const { @@ -63,8 +57,7 @@ function buildDecisionTreeMix( // LEAF var initialEntropy = entropy(trainingSet, categoryAttr); - if (initialEntropy <= entropyThrehold && !isChanged) { - console.log('initialEntropy ' + initialEntropy + '<=' + entropyThrehold + ' entropyThrehold'); + if (initialEntropy <= entropyThrehold) { let _category = mostFrequentValue(trainingSet, categoryAttr); let _positiveCounter = 0; for (let element of trainingSet) { @@ -96,10 +89,11 @@ function buildDecisionTreeMix( var lowest; var tmp; - var min = 1000; + var min = 0; for (var alg of arrayOfTests) { tmp = alg.maxDif; - if (tmp < min) { + //console.log(tmp); + if (tmp > min) { lowest = alg; min = tmp; } @@ -220,9 +214,7 @@ context.onmessage = function (event) { }; function TSPDif(allClasses, attributes, trainingSet, categoryAttr) { - var right = 0, - left = 0; - var maxDif = 100; + var maxDif = 0; var direction = '<'; /** @type {string | number} */ var attribute1 = -1; /** @type {string | number} */ var attribute2 = -1; @@ -231,11 +223,14 @@ function TSPDif(allClasses, attributes, trainingSet, categoryAttr) { classMatrix = [new Array(allClasses.length).fill(0), new Array(allClasses.length).fill(0)], match = [], notMatch = []; + var initialEntropy = entropy(trainingSet, categoryAttr); - for (let attr1 of attributes) { - for (let attr2 of attributes) { + let attr1, attr2; + for (let i = 0; i < attributes.length; i++) { + attr1 = attributes[i]; + for (let j = i + 1; j < attributes.length; j++) { + attr2 = attributes[j]; if (attr1 !== attr2) { - right = left = 0; leftList = []; rightList = []; classMatrix = [new Array(allClasses.length).fill(0), new Array(allClasses.length).fill(0)]; @@ -245,33 +240,41 @@ function TSPDif(allClasses, attributes, trainingSet, categoryAttr) { const attribute = element[categoryAttr]; if (element[attr1] < element[attr2]) { - left++; leftList.push(element); classMatrix[0][allClasses.indexOf(attribute)]++; } else { - right++; rightList.push(element); classMatrix[1][allClasses.indexOf(attribute)]++; } } // probability - var probR = 0, - probL = 0, - rankL = 0, - rankR = 0; - for (let k = 0; k < allClasses.length; k++) { - probL = left === 0 ? 0 : classMatrix[0][k] / left; - probR = right === 0 ? 0 : classMatrix[1][k] / right; + // var probR = 0, + // probL = 0, + // rankL = 0, + // rankR = 0; + // for (let k = 0; k < allClasses.length; k++) { + // probL = left === 0 ? 0 : classMatrix[0][k] / left; + // probR = right === 0 ? 0 : classMatrix[1][k] / right; - rankL += probL * probL; - rankR += probR * probR; - } + // rankL += probL * probL; + // rankR += probR * probR; + // } - // setting new values - var currentDif = - (right / trainingSet.length) * (1 - rankR) + (left / trainingSet.length) * (1 - rankL); - if (currentDif < maxDif) { + // // setting new values + // var currentDif = + // (right / trainingSet.length) * (1 - rankR) + (left / trainingSet.length) * (1 - rankL); + + let matchEntropy = entropy(rightList, categoryAttr); + let notMatchEntropy = entropy(leftList, categoryAttr); + + // calculating informational gain + let newEntropy = 0; + newEntropy += matchEntropy * rightList.length; + newEntropy += notMatchEntropy * leftList.length; + newEntropy /= trainingSet.length; + let currentDif = initialEntropy - newEntropy; + if (currentDif > maxDif) { maxDif = currentDif; attribute1 = attr1; attribute2 = attr2; @@ -292,7 +295,7 @@ function TSPWDif(allClasses, attributes, trainingSet, categoryAttr) { L_weight = 0, weight = 0, direction = '<'; - var maxDif = 100; + var maxDif = 0; /** @type {string | number} */ var attribute1 = -1; /** @type {string | number} */ var attribute2 = -1; var leftList = [], @@ -300,9 +303,13 @@ function TSPWDif(allClasses, attributes, trainingSet, categoryAttr) { classMatrix = [new Array(allClasses.length).fill(0), new Array(allClasses.length).fill(0)], match = [], notMatch = []; + var initialEntropy = entropy(trainingSet, categoryAttr); - for (let attr1 of attributes) { - for (let attr2 of attributes) { + let attr1, attr2; + for (let i = 0; i < attributes.length; i++) { + attr1 = attributes[i]; + for (let j = i + 1; j < attributes.length; j++) { + attr2 = attributes[j]; if (attr1 !== attr2) { right = left = sum1 = sum2 = weight = 0; leftList = []; @@ -333,22 +340,17 @@ function TSPWDif(allClasses, attributes, trainingSet, categoryAttr) { classMatrix[1][allClasses.indexOf(attribute)]++; } } - var probR = 0, - probL = 0, - rankL = 0, - rankR = 0; - for (let k = 0; k < allClasses.length; k++) { - probL = left === 0 ? 0 : classMatrix[0][k] / left; - probR = right === 0 ? 0 : classMatrix[1][k] / right; - rankL += probL * probL; - rankR += probR * probR; - } + let matchEntropy = entropy(rightList, categoryAttr); + let notMatchEntropy = entropy(leftList, categoryAttr); - var currentDif = - (right / trainingSet.length) * (1 - rankR) + (left / trainingSet.length) * (1 - rankL); - - if (currentDif < maxDif) { + // calculating informational gain + let newEntropy = 0; + newEntropy += matchEntropy * rightList.length; + newEntropy += notMatchEntropy * leftList.length; + newEntropy /= trainingSet.length; + let currentDif = initialEntropy - newEntropy; + if (currentDif > maxDif) { maxDif = currentDif; attribute1 = attr1; attribute2 = attr2; diff --git a/public/algorithms/tsp-tree.js b/public/algorithms/tsp-tree.js index 863d2bc..ba84da8 100644 --- a/public/algorithms/tsp-tree.js +++ b/public/algorithms/tsp-tree.js @@ -56,7 +56,7 @@ function buildDecisionTreeTSP( var right = 0, left = 0; - var maxDif = 100, + var maxDif = 0, currentDif; /** @type {string | number} */ var attribute1 = -1; /** @type {string | number} */ var attribute2 = -1; @@ -94,28 +94,44 @@ function buildDecisionTreeTSP( } } - // probability - probR = 0; - probL = 0; - rankL = 0; - rankR = 0; - for (let k = 0; k < builder.allClasses.length; k++) { - probL = left === 0 ? 0 : classMatrix[0][k] / left; - probR = right === 0 ? 0 : classMatrix[1][k] / right; + // // probability + // probR = 0; + // probL = 0; + // rankL = 0; + // rankR = 0; + // for (let k = 0; k < builder.allClasses.length; k++) { + // probL = left === 0 ? 0 : classMatrix[0][k] / left; + // probR = right === 0 ? 0 : classMatrix[1][k] / right; - rankL += probL * probL; - rankR += probR * probR; - } + // rankL += probL * probL; + // rankR += probR * probR; + // } - // setting new values - currentDif = (right / trainingSet.length) * (1 - rankR) + (left / trainingSet.length) * (1 - rankL); + // // setting new values + // currentDif = (right / trainingSet.length) * (1 - rankR) + (left / trainingSet.length) * (1 - rankL); + + // maxDif = currentDif; + // attribute1 = changedAttribute1; + // attribute2 = changedAttribute2; + // match = leftList; + // notMatch = rightList; + //podzial = classMatrix; + + let matchEntropy = entropy(rightList, categoryAttr); + let notMatchEntropy = entropy(leftList, categoryAttr); + + // calculating informational gain + let newEntropy = 0; + newEntropy += matchEntropy * rightList.length; + newEntropy += notMatchEntropy * leftList.length; + newEntropy /= trainingSet.length; + let currentDif = initialEntropy - newEntropy; maxDif = currentDif; attribute1 = changedAttribute1; attribute2 = changedAttribute2; match = leftList; notMatch = rightList; - //podzial = classMatrix; isChanged = false; } else if (isUpdate && !isChanged) { @@ -165,34 +181,52 @@ function buildDecisionTreeTSP( } } - // probability - probR = 0; - probL = 0; - rankL = 0; - rankR = 0; - for (let k = 0; k < builder.allClasses.length; k++) { - probL = left === 0 ? 0 : classMatrix[0][k] / left; - probR = right === 0 ? 0 : classMatrix[1][k] / right; + // // probability + // probR = 0; + // probL = 0; + // rankL = 0; + // rankR = 0; + // for (let k = 0; k < builder.allClasses.length; k++) { + // probL = left === 0 ? 0 : classMatrix[0][k] / left; + // probR = right === 0 ? 0 : classMatrix[1][k] / right; - rankL += probL * probL; - rankR += probR * probR; - } + // rankL += probL * probL; + // rankR += probR * probR; + // } - // setting new values - currentDif = (right / trainingSet.length) * (1 - rankR) + (left / trainingSet.length) * (1 - rankL); + // // setting new values + // currentDif = (right / trainingSet.length) * (1 - rankR) + (left / trainingSet.length) * (1 - rankL); + + // maxDif = currentDif; + // attribute1 = changedAttribute1; + // attribute2 = changedAttribute2; + // match = leftList; + // notMatch = rightList; + //podzial = classMatrix; + + let matchEntropy = entropy(rightList, categoryAttr); + let notMatchEntropy = entropy(leftList, categoryAttr); + + // calculating informational gain + let newEntropy = 0; + newEntropy += matchEntropy * rightList.length; + newEntropy += notMatchEntropy * leftList.length; + newEntropy /= trainingSet.length; + let currentDif = initialEntropy - newEntropy; maxDif = currentDif; - attribute1 = changedAttribute1; - attribute2 = changedAttribute2; + attribute1 = oldTree.attr2; + attribute2 = oldTree.pivot; match = leftList; notMatch = rightList; - //podzial = classMatrix; } } else { let attr1, attr2; for (let i = 0; i < attributes.length; i++) { attr1 = attributes[i]; for (let j = i + 1; j < attributes.length; j++) { + attr2 = attributes[j]; + if (attr1 !== attr2) { right = left = 0; leftList = []; @@ -217,28 +251,45 @@ function buildDecisionTreeTSP( } } - // probability - probR = 0; - probL = 0; - rankL = 0; - rankR = 0; - for (let k = 0; k < builder.allClasses.length; k++) { - probL = left === 0 ? 0 : classMatrix[0][k] / left; - probR = right === 0 ? 0 : classMatrix[1][k] / right; + // // probability + // probR = 0; + // probL = 0; + // rankL = 0; + // rankR = 0; + // for (let k = 0; k < builder.allClasses.length; k++) { + // probL = left === 0 ? 0 : classMatrix[0][k] / left; + // probR = right === 0 ? 0 : classMatrix[1][k] / right; - rankL += probL * probL; - rankR += probR * probR; - } + // rankL += probL * probL; + // rankR += probR * probR; + // } - // setting new values - currentDif = (right / trainingSet.length) * (1 - rankR) + (left / trainingSet.length) * (1 - rankL); - if (currentDif < maxDif) { + // // setting new values + // currentDif = (right / trainingSet.length) * (1 - rankR) + (left / trainingSet.length) * (1 - rankL); + // if (currentDif < maxDif) { + // maxDif = currentDif; + // attribute1 = attr1; + // attribute2 = attr2; + // match = leftList; + // notMatch = rightList; + // //podzial = classMatrix; + // } + + let matchEntropy = entropy(rightList, categoryAttr); + let notMatchEntropy = entropy(leftList, categoryAttr); + + // calculating informational gain + let newEntropy = 0; + newEntropy += matchEntropy * rightList.length; + newEntropy += notMatchEntropy * leftList.length; + newEntropy /= trainingSet.length; + let currentDif = initialEntropy - newEntropy; + if (currentDif > maxDif) { maxDif = currentDif; attribute1 = attr1; attribute2 = attr2; match = leftList; notMatch = rightList; - //podzial = classMatrix; } } } diff --git a/public/algorithms/tsp-weight-tree.js b/public/algorithms/tsp-weight-tree.js index 3cc602a..4eb524e 100644 --- a/public/algorithms/tsp-weight-tree.js +++ b/public/algorithms/tsp-weight-tree.js @@ -87,7 +87,7 @@ function buildDecisionTreeTSPW( sum1 = 0, sum2 = 0, L_weight = 0; - var maxDif = 100; + var maxDif = 0; /** @type {string | number} */ var attribute1 = -1; /** @type {string | number} */ var attribute2 = -1; var directrion = '<'; @@ -124,22 +124,38 @@ function buildDecisionTreeTSPW( } } - // probability - probR = 0; - probL = 0; - rankL = 0; - rankR = 0; - for (let k = 0; k < builder.allClasses.length; k++) { - probL = left === 0 ? 0 : classMatrix[0][k] / left; - probR = right === 0 ? 0 : classMatrix[1][k] / right; + // // probability + // probR = 0; + // probL = 0; + // rankL = 0; + // rankR = 0; + // for (let k = 0; k < builder.allClasses.length; k++) { + // probL = left === 0 ? 0 : classMatrix[0][k] / left; + // probR = right === 0 ? 0 : classMatrix[1][k] / right; - rankL += probL * probL; - rankR += probR * probR; - } + // rankL += probL * probL; + // rankR += probR * probR; + // } - // setting new values - currentDif = (right / trainingSet.length) * (1 - rankR) + (left / trainingSet.length) * (1 - rankL); + // // setting new values + // currentDif = (right / trainingSet.length) * (1 - rankR) + (left / trainingSet.length) * (1 - rankL); + // maxDif = currentDif; + // attribute1 = changedAttribute1; + // attribute2 = changedAttribute2; + // match = leftList; + // notMatch = rightList; + // L_weight = weight; + + let matchEntropy = entropy(rightList, categoryAttr); + let notMatchEntropy = entropy(leftList, categoryAttr); + + // calculating informational gain + let newEntropy = 0; + newEntropy += matchEntropy * rightList.length; + newEntropy += notMatchEntropy * leftList.length; + newEntropy /= trainingSet.length; + let currentDif = initialEntropy - newEntropy; maxDif = currentDif; attribute1 = changedAttribute1; attribute2 = changedAttribute2; @@ -191,21 +207,16 @@ function buildDecisionTreeTSPW( } } - probR = 0; - probL = 0; - rankL = 0; - rankR = 0; - for (let k = 0; k < builder.allClasses.length; k++) { - probL = left === 0 ? 0 : classMatrix[0][k] / left; - probR = right === 0 ? 0 : classMatrix[1][k] / right; + let matchEntropy = entropy(rightList, categoryAttr); + let notMatchEntropy = entropy(leftList, categoryAttr); - rankL += probL * probL; - rankR += probR * probR; - } - - currentDif = (right / trainingSet.length) * (1 - rankR) + (left / trainingSet.length) * (1 - rankL); - - if (currentDif < maxDif) { + // calculating informational gain + let newEntropy = 0; + newEntropy += matchEntropy * rightList.length; + newEntropy += notMatchEntropy * leftList.length; + newEntropy /= trainingSet.length; + let currentDif = initialEntropy - newEntropy; + if (currentDif > maxDif) { maxDif = currentDif; attribute1 = attr1; attribute2 = attr2; @@ -259,7 +270,7 @@ function buildDecisionTreeTSPW( trainingSet2: trainingSet, }; } - console.log('-----------Podział-----------'); + //console.log('-----------Podział-----------'); builder.maxTreeDepth = maxTreeDepth - 1; builder.trainingSet = match; var matchSubTree = buildDecisionTreeTSPW(builder); diff --git a/src/components/Tree.jsx b/src/components/Tree.jsx index ed7b17e..e9ba969 100644 --- a/src/components/Tree.jsx +++ b/src/components/Tree.jsx @@ -20,7 +20,7 @@ import { executeAlgorithm } from '../utils/algorithm-executor'; import TestSetFileReader from './TestSetFileReader'; import ConfusionMatrix from './ConfusionMatrix'; import { getSizeTree } from '../utils/size-checker'; -import { testTree } from '../services/playground3'; +import { rebuildTestTree } from '../utils/RebuilderTestTree'; /** * @typedef {import('../utils/decision-tree.js').DecisionTreeBuilder} DecisionTreeBuilder @@ -95,11 +95,11 @@ const Tree = ({ options }) => { console.log(options.categoryAttr); let tmpRoot = JSON.parse(JSON.stringify(newRoot)); if (testSet == null) { - testTree(tmpRoot, options.trainingSet, options.categoryAttr); + rebuildTestTree(tmpRoot, options.trainingSet, options.categoryAttr); console.log(tmpRoot); setSecondRoot(tmpRoot); } else { - testTree(tmpRoot, testSet, options.categoryAttr); + rebuildTestTree(tmpRoot, testSet, options.categoryAttr); console.log(tmpRoot); setSecondRoot(tmpRoot); } diff --git a/src/services/playground3.js b/src/services/playground3.js index 7560662..bb6bc91 100644 --- a/src/services/playground3.js +++ b/src/services/playground3.js @@ -32,13 +32,13 @@ export function testTree(tree, newData, categoryAttr) { let match; if (tree.predicateName === '==' || tree.predicateName === '>=') { match = predicate(x[tree.attr2], tree.pivot); - console.log('c45', match); + //console.log('c45', match); } else if (tree.weight) { match = predicate(x[tree.attr2], x[tree.pivot], tree.weight); - console.log('tspw', match); + //console.log('tspw', match); } else { match = predicate(x[tree.attr2], x[tree.pivot]); - console.log('tsp', match); + //console.log('tsp', match); } match ? matchedData.push(x) : notMatchedData.push(x); diff --git a/src/utils/RebuilderTestTree.js b/src/utils/RebuilderTestTree.js new file mode 100644 index 0000000..1335184 --- /dev/null +++ b/src/utils/RebuilderTestTree.js @@ -0,0 +1,67 @@ +export function rebuildTestTree(tree, newData, categoryAttr) { + let predicate; + + if (tree.category) { + tree.trainingSet2 = newData; + let _positiveCounter = 0, + _quality = 0; + for (let element of newData) { + if (element[categoryAttr] === tree.category) _positiveCounter++; + } + let _negativeCounter = newData.length - _positiveCounter; + _quality = _positiveCounter / newData.length; + _quality = _quality * 100; + + tree.quality = _quality.toFixed(2); + tree.matchedCount = _positiveCounter; + tree.notMatchedCount = _negativeCounter; + return; + } else { + tree.nodeSet = newData; + + if (tree.weight) { + predicate = predicates['w']; + } else { + predicate = predicates[tree.predicateName]; + } + + let matchedData = [], + notMatchedData = []; + + newData.forEach(x => { + let match; + if (tree.predicateName === '==' || tree.predicateName === '>=') { + match = predicate(x[tree.attr2], tree.pivot); + //console.log('c45', match); + } else if (tree.weight) { + match = predicate(x[tree.attr2], x[tree.pivot], tree.weight); + //console.log('tspw', match); + } else { + match = predicate(x[tree.attr2], x[tree.pivot]); + //console.log('tsp', match); + } + + match ? matchedData.push(x) : notMatchedData.push(x); + }); + + tree.matchedCount = matchedData.length; + tree.notMatchedCount = notMatchedData.length; + rebuildTestTree(tree.match, matchedData, categoryAttr); + rebuildTestTree(tree.notMatch, notMatchedData, categoryAttr); + } +} + +var predicates = { + '==': function (a, b) { + return a === b; + }, + '>=': function (a, b) { + return a >= b; + }, + '<': function (a, b) { + return a < b; + }, + w: function (a, b, w) { + return a < w * b; + }, +};