mirror of
https://github.com/KDE/konsole.git
synced 2026-05-08 06:35:07 -04:00
Add a tool for generating character width tables
Summary: The uni2characterwidth tool, converts Unicode Character Database files into character width lookup tables. It uses a template file to place the tables in a source code file together with a function for finding the width for specified character. It also allows to generate few forms of lists with width data for debug and test purposes, or for future use as a replacement of Unicode files. Set `KONSOLE_BUILD_UNI2CHARACTERWIDTH` cmake flag to build the tool. Use `--help` argument for more detailed usage. There is a possibility to generate separate "width" for Ambiguous characters. It can be used to add ability to configure the characters width in Konsole settings. The `example.template` file contains all possible named tags, and some additional tags to show how to use them. CCBUG: 396435 Depends on D15756 Test Plan: Download files listed below from `11.0.0` and `emoji/11.0` directories on `https://unicode.org/Public/`. You can also directly use URLs to the files. * UnicodeData.txt * EastAsianWidth.txt * emoji-data.txt Generate any available list except compact-ranges (e.g. `details`): ``` uni2characterwidth \ -U UnicodeData.txt -A EastAsianWidth.txt -E emoji-data.txt \ -g details result.txt ``` The list should contain ranges for all possible widths (-2, -1, 0, 1, 2). You can choose some characters with a width you know and check how they were classified. -2 is a special non-standard width for ambiguous characters, which can be overriden by adding `-a 1` or `-a 2` parameter. With this flag, all ranges from -2 group should disappear and become assigned to selected width (1 or 2). Generate output using a template: ``` uni2characterwidth \ -U UnicodeData.txt -A EastAsianWidth.txt -E emoji-data.txt \ -g code,./template.example result.txt ``` Reviewers: #konsole, hindenburg Reviewed By: #konsole, hindenburg Subscribers: hindenburg, konsole-devel Tags: #konsole Differential Revision: https://phabricator.kde.org/D15757
This commit is contained in:
committed by
Kurt Hindenburg
parent
0f33ee504b
commit
5f32cb3c44
@@ -33,9 +33,10 @@ endif()
|
||||
### Security concerns about sendText and runCommand dbus methods being public
|
||||
option(REMOVE_SENDTEXT_RUNCOMMAND_DBUS_METHODS "Konsole: remove sendText and runCommand dbus methods" OFF)
|
||||
|
||||
### Font Embedder and LineFont.h
|
||||
### Development tools
|
||||
option(KONSOLE_BUILD_FONTEMBEDDER "Konsole: build fontembedder executable" OFF)
|
||||
option(KONSOLE_GENERATE_LINEFONT "Konsole: regenerate LineFont file" OFF)
|
||||
option(KONSOLE_BUILD_UNI2CHARACTERWIDTH "Konsole: build uni2characterwidth executable" OFF)
|
||||
|
||||
### Konsole source files shared between embedded terminal and main application
|
||||
# qdbuscpp2xml -m Session.h -o org.kde.konsole.Session.xml
|
||||
|
||||
@@ -17,3 +17,4 @@ if(KONSOLE_BUILD_FONTEMBEDDER OR KONSOLE_GENERATE_LINEFONT)
|
||||
target_link_libraries(fontembedder Qt5::Core)
|
||||
endif()
|
||||
|
||||
add_subdirectory( uni2characterwidth )
|
||||
|
||||
30
tools/uni2characterwidth/CMakeLists.txt
Normal file
30
tools/uni2characterwidth/CMakeLists.txt
Normal file
@@ -0,0 +1,30 @@
|
||||
### uni2characterwidth
|
||||
###
|
||||
### Converts Unicode Character Database files into character width lookup
|
||||
### tables. Uses a template file to place the tables in a source code file
|
||||
### together with a function for finding the width for specified character.
|
||||
###
|
||||
### See `uni2characterwidth --help` for usage information
|
||||
if(KONSOLE_BUILD_UNI2CHARACTERWIDTH)
|
||||
|
||||
find_package(Qt5 ${QT_MIN_VERSION} CONFIG REQUIRED
|
||||
Core
|
||||
)
|
||||
find_package(KF5 ${KF5_MIN_VERSION} REQUIRED
|
||||
KIO
|
||||
)
|
||||
|
||||
set(uni2characterwidth_SRC
|
||||
uni2characterwidth.cpp
|
||||
properties.h
|
||||
template.cpp
|
||||
template.h
|
||||
)
|
||||
|
||||
add_executable(uni2characterwidth ${uni2characterwidth_SRC})
|
||||
target_link_libraries(uni2characterwidth
|
||||
Qt5::Core
|
||||
KF5::KIOCore
|
||||
)
|
||||
|
||||
endif()
|
||||
78
tools/uni2characterwidth/properties.h
Normal file
78
tools/uni2characterwidth/properties.h
Normal file
@@ -0,0 +1,78 @@
|
||||
#ifndef CATEGORY_PROPERTY_VALUE
|
||||
#define CATEGORY_PROPERTY_VALUE(val, sym, intVal)
|
||||
#endif
|
||||
#ifndef CATEGORY_PROPERTY_GROUP
|
||||
#define CATEGORY_PROPERTY_GROUP(val, sym, intVal)
|
||||
#endif
|
||||
|
||||
CATEGORY_PROPERTY_VALUE(Lu, UppercaseLetter, 1<<0) // an uppercase letter
|
||||
CATEGORY_PROPERTY_VALUE(Ll, LowercaseLetter, 1<<1) // a lowercase letter
|
||||
CATEGORY_PROPERTY_VALUE(Lt, TitlecaseLetter, 1<<2) // a digraphic character, with first part uppercase
|
||||
CATEGORY_PROPERTY_GROUP(LC, CasedLetter, 1<<0|1<<1|1<<2)
|
||||
CATEGORY_PROPERTY_VALUE(Lm, ModifierLetter, 1<<3) // a modifier letter
|
||||
CATEGORY_PROPERTY_VALUE(Lo, OtherLetter, 1<<4) // other letters, including syllables and ideographs
|
||||
CATEGORY_PROPERTY_GROUP(L, Letter, 1<<0|1<<1|1<<2|1<<3|1<<4)
|
||||
CATEGORY_PROPERTY_VALUE(Mn, NonspacingMark, 1<<5) // a nonspacing combining mark (zero advance width)
|
||||
CATEGORY_PROPERTY_VALUE(Mc, SpacingMark, 1<<6) // a spacing combining mark (positive advance width)
|
||||
CATEGORY_PROPERTY_VALUE(Me, EnclosingMark, 1<<7) // an enclosing combining mark
|
||||
CATEGORY_PROPERTY_GROUP(M, Mark, 1<<5|1<<6|1<<7)
|
||||
CATEGORY_PROPERTY_VALUE(Nd, DecimalNumber, 1<<8) // a decimal digit
|
||||
CATEGORY_PROPERTY_VALUE(Nl, LetterNumber, 1<<9) // a letterlike numeric character
|
||||
CATEGORY_PROPERTY_VALUE(No, OtherNumber, 1<<10) // a numeric character of other type
|
||||
CATEGORY_PROPERTY_GROUP(N, Number, 1<<8|1<<9|1<<10)
|
||||
CATEGORY_PROPERTY_VALUE(Pc, ConnectorPunctuation, 1<<11) // a connecting punctuation mark, like a tie
|
||||
CATEGORY_PROPERTY_VALUE(Pd, DashPunctuation, 1<<12) // a dash or hyphen punctuation mark
|
||||
CATEGORY_PROPERTY_VALUE(Ps, OpenPunctuation, 1<<13) // an opening punctuation mark (of a pair)
|
||||
CATEGORY_PROPERTY_VALUE(Pe, ClosePunctuation, 1<<14) // a closing punctuation mark (of a pair)
|
||||
CATEGORY_PROPERTY_VALUE(Pi, InitialPunctuation, 1<<15) // an initial quotation mark
|
||||
CATEGORY_PROPERTY_VALUE(Pf, FinalPunctuation, 1<<16) // a final quotation mark
|
||||
CATEGORY_PROPERTY_VALUE(Po, OtherPunctuation, 1<<17) // a punctuation mark of other type
|
||||
CATEGORY_PROPERTY_GROUP(P, Punctuation, 1<<11|1<<12|1<<13|1<<14|1<<15|1<<16|1<<17)
|
||||
CATEGORY_PROPERTY_VALUE(Sm, MathSymbol, 1<<18) // a symbol of mathematical use
|
||||
CATEGORY_PROPERTY_VALUE(Sc, CurrencySymbol, 1<<19) // a currency sign
|
||||
CATEGORY_PROPERTY_VALUE(Sk, ModifierSymbol, 1<<20) // a non-letterlike modifier symbol
|
||||
CATEGORY_PROPERTY_VALUE(So, OtherSymbol, 1<<21) // a symbol of other type
|
||||
CATEGORY_PROPERTY_GROUP(S, Symbol, 1<<18|1<<19|1<<20|1<<21)
|
||||
CATEGORY_PROPERTY_VALUE(Zs, SpaceSeparator, 1<<22) // a space character (of various non-zero widths)
|
||||
CATEGORY_PROPERTY_VALUE(Zl, LineSeparator, 1<<23) // U+2028 LINE SEPARATOR only
|
||||
CATEGORY_PROPERTY_VALUE(Zp, ParagraphSeparator, 1<<24) // U+2029 PARAGRAPH SEPARATOR only
|
||||
CATEGORY_PROPERTY_GROUP(Z, Separator, 1<<22|1<<23|1<<24)
|
||||
CATEGORY_PROPERTY_VALUE(Cc, Control, 1<<25) // a C0 or C1 control code
|
||||
CATEGORY_PROPERTY_VALUE(Cf, Format, 1<<26) // a format control character
|
||||
CATEGORY_PROPERTY_VALUE(Cs, Surrogate, 1<<27) // a surrogate code point
|
||||
CATEGORY_PROPERTY_VALUE(Co, PrivateUse, 1<<28) // a private-use character
|
||||
CATEGORY_PROPERTY_VALUE(Cn, Unassigned, 1<<29) // a reserved unassigned code point or a noncharacter
|
||||
CATEGORY_PROPERTY_GROUP(C, Other, 1<<25|1<<26|1<<27|1<<28|1<<29)
|
||||
|
||||
#undef CATEGORY_PROPERTY_VALUE
|
||||
#undef CATEGORY_PROPERTY_GROUP
|
||||
|
||||
/**************************************/
|
||||
|
||||
#ifndef EAST_ASIAN_WIDTH_PROPERTY_VALUE
|
||||
#define EAST_ASIAN_WIDTH_PROPERTY_VALUE(val, sym, intVal)
|
||||
#endif
|
||||
|
||||
EAST_ASIAN_WIDTH_PROPERTY_VALUE(A, Ambiguous, 1<<0)
|
||||
EAST_ASIAN_WIDTH_PROPERTY_VALUE(F, Fullwidth, 1<<1)
|
||||
EAST_ASIAN_WIDTH_PROPERTY_VALUE(H, Halfwidth, 1<<2)
|
||||
EAST_ASIAN_WIDTH_PROPERTY_VALUE(N, Neutral, 1<<3)
|
||||
EAST_ASIAN_WIDTH_PROPERTY_VALUE(Na, Narrow, 1<<4)
|
||||
EAST_ASIAN_WIDTH_PROPERTY_VALUE(W, Wide, 1<<5)
|
||||
|
||||
#undef EAST_ASIAN_WIDTH_PROPERTY_VALUE
|
||||
|
||||
/**************************************/
|
||||
|
||||
#ifndef EMOJI_PROPERTY_VALUE
|
||||
#define EMOJI_PROPERTY_VALUE(val, sym, intVal)
|
||||
#endif
|
||||
|
||||
EMOJI_PROPERTY_VALUE(, None, 0)
|
||||
EMOJI_PROPERTY_VALUE(Emoji, Emoji, 1<<0)
|
||||
EMOJI_PROPERTY_VALUE(Emoji_Presentation, EmojiPresentation, 1<<1)
|
||||
EMOJI_PROPERTY_VALUE(Emoji_Modifier, EmojiModifier, 1<<2)
|
||||
EMOJI_PROPERTY_VALUE(Emoji_Modifier_Base, EmojiModifier_Base, 1<<3)
|
||||
EMOJI_PROPERTY_VALUE(Emoji_Component, EmojiComponent, 1<<4)
|
||||
|
||||
#undef EMOJI_PROPERTY_VALUE
|
||||
404
tools/uni2characterwidth/template.cpp
Normal file
404
tools/uni2characterwidth/template.cpp
Normal file
@@ -0,0 +1,404 @@
|
||||
/*
|
||||
This file is part of Konsole, a terminal emulator for KDE.
|
||||
|
||||
Copyright 2018 by Mariusz Glebocki <mglb@arccos-1.net>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include <QDebug>
|
||||
#include <QMap>
|
||||
#include <QString>
|
||||
#include <QVector>
|
||||
#include <QRegularExpression>
|
||||
#include <QRegularExpressionMatch>
|
||||
#include "template.h"
|
||||
|
||||
static const QString unescape(const QStringRef &str) {
|
||||
QString result;
|
||||
result.reserve(str.length());
|
||||
for(int i = 0; i < str.length(); ++i) {
|
||||
if(str[i] == QLatin1Char('\\') && i < str.length() - 1)
|
||||
result += str[++i];
|
||||
else
|
||||
result += str[i];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
//
|
||||
// Template::Element
|
||||
//
|
||||
const QString Template::Element::findFmt(Var::DataType type) const {
|
||||
const Template::Element *element;
|
||||
for(element = this; element != nullptr; element = element->parent) {
|
||||
if(!element->fmt.isEmpty() && isValidFmt(element->fmt, type)) {
|
||||
return element->fmt;
|
||||
}
|
||||
}
|
||||
return defaultFmt(type);
|
||||
}
|
||||
|
||||
QString Template::Element::path() const {
|
||||
QStringList namesList;
|
||||
const Template::Element *element;
|
||||
for(element = this; element != nullptr; element = element->parent) {
|
||||
if(!element->hasName() && element->parent != nullptr) {
|
||||
QString anonName = QStringLiteral("[anon]");
|
||||
for(int i = 0; i < element->parent->children.size(); ++i) {
|
||||
if(&element->parent->children[i] == element) {
|
||||
anonName = QStringLiteral("[%1]").arg(i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
namesList.prepend(anonName);
|
||||
} else {
|
||||
namesList.prepend(element->name);
|
||||
}
|
||||
}
|
||||
return namesList.join(QLatin1Char('.'));
|
||||
}
|
||||
|
||||
const QString Template::Element::defaultFmt(Var::DataType type) {
|
||||
switch(type) {
|
||||
case Var::DataType::Number: return QStringLiteral("%d");
|
||||
case Var::DataType::String: return QStringLiteral("%s");
|
||||
default: Q_UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
bool Template::Element::isValidFmt(const QString &fmt, Var::DataType type) {
|
||||
switch(type) {
|
||||
case Var::DataType::String: return fmt.endsWith(QLatin1Char('s'));
|
||||
case Var::DataType::Number: return true; // regexp in parser takes care of it
|
||||
default: return false;
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Template
|
||||
//
|
||||
|
||||
Template::Template(const QString &text): _text(text) {
|
||||
_root.name = QStringLiteral("[root]");
|
||||
_root.outer = QStringRef(&_text);
|
||||
_root.inner = QStringRef(&_text);
|
||||
_root.parent = nullptr;
|
||||
_root.line = 1;
|
||||
_root.column = 1;
|
||||
}
|
||||
|
||||
void Template::parse() {
|
||||
_root.children.clear();
|
||||
_root.outer = QStringRef(&_text);
|
||||
_root.inner = QStringRef(&_text);
|
||||
parseRecursively(_root);
|
||||
// dbgDumpTree(_root);
|
||||
}
|
||||
|
||||
QString Template::generate(const Var &data) {
|
||||
QString result;
|
||||
result.reserve(_text.size());
|
||||
generateRecursively(result, _root, data);
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline void warn(const Template::Element &element, const QString &id, const QString &msg) {
|
||||
const QString path = id.isEmpty() ? element.path() : Template::Element(&element, id).path();
|
||||
qWarning() << QStringLiteral("Warning: %1:%2: %3: %4").arg(element.line).arg(element.column).arg(path, msg);
|
||||
}
|
||||
static inline void warn(const Template::Element &element, const QString &msg) {
|
||||
warn(element, QString(), msg);
|
||||
}
|
||||
|
||||
void Template::executeCommand(Element &element, const Template::Element &childStub, const QStringList &argv) {
|
||||
// Insert content N times
|
||||
if(argv[0] == QStringLiteral("repeat")) {
|
||||
bool ok;
|
||||
unsigned count = argv.value(1).toInt(&ok);
|
||||
if(!ok || count < 1) {
|
||||
warn(element, QStringLiteral("!") + argv[0], QStringLiteral("invalid repeat count (%1), assuming 0.").arg(argv[1]));
|
||||
return;
|
||||
}
|
||||
|
||||
element.children.append(childStub);
|
||||
Template::Element &cmdElement = element.children.last();
|
||||
if(!cmdElement.inner.isEmpty()) {
|
||||
// Parse children
|
||||
parseRecursively(cmdElement);
|
||||
// Remember how many children was there before replication
|
||||
int originalChildrenCount = cmdElement.children.size();
|
||||
// Replicate children
|
||||
for(unsigned i = 1; i < count; ++i) {
|
||||
for(int chId = 0; chId < originalChildrenCount; ++chId) {
|
||||
cmdElement.children.append(cmdElement.children[chId]);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Set printf-like format (with leading %) applied for strings and numbers
|
||||
// inside the group
|
||||
} else if(argv[0] == QStringLiteral("fmt")) {
|
||||
static const QRegularExpression FMT_RE(QStringLiteral(R":(^%[-0 +#]?(?:[1-9][0-9]*)?\.?[0-9]*[diouxXs]$):"),
|
||||
QRegularExpression::OptimizeOnFirstUsageOption);
|
||||
const auto match = FMT_RE.match(argv.value(1));
|
||||
QString fmt = QStringLiteral("");
|
||||
if(!match.hasMatch())
|
||||
warn(element, QStringLiteral("!") + argv[0], QStringLiteral("invalid format (%1), assuming default").arg(argv[1]));
|
||||
else
|
||||
fmt = match.captured();
|
||||
|
||||
element.children.append(childStub);
|
||||
Template::Element &cmdElement = element.children.last();
|
||||
cmdElement.fmt = fmt;
|
||||
parseRecursively(cmdElement);
|
||||
}
|
||||
}
|
||||
|
||||
void Template::parseRecursively(Element &element) {
|
||||
static const QRegularExpression RE(QStringLiteral(R":((?'comment'«\*(([^:]*):)?.*?(?(-2):\g{-1})\*»)|):"
|
||||
R":(«(?:(?'name'[-_a-zA-Z0-9]*)|(?:!(?'cmd'[-_a-zA-Z0-9]+(?: +(?:[^\\:]+|(?:\\.)+)+)?)))):"
|
||||
R":((?::(?:~[ \t]*\n)?(?'inner'(?:[^«]*?|(?R))*))?(?:\n[ \t]*~)?»):"),
|
||||
QRegularExpression::DotMatchesEverythingOption | QRegularExpression::MultilineOption |
|
||||
QRegularExpression::OptimizeOnFirstUsageOption);
|
||||
static const QRegularExpression CMD_SPLIT_RE(QStringLiteral(R":((?:"((?:(?:\\.)*|[^"]*)*)"|(?:[^\\ "]+|(?:\\.)+)+)):"),
|
||||
QRegularExpression::DotMatchesEverythingOption | QRegularExpression::MultilineOption |
|
||||
QRegularExpression::OptimizeOnFirstUsageOption);
|
||||
static const QRegularExpression UNESCAPE_RE(QStringLiteral(R":(\\(.)):"),
|
||||
QRegularExpression::DotMatchesEverythingOption | QRegularExpression::MultilineOption |
|
||||
QRegularExpression::OptimizeOnFirstUsageOption);
|
||||
static const QString nameGroupName = QStringLiteral("name");
|
||||
static const QString innerGroupName = QStringLiteral("inner");
|
||||
static const QString cmdGroupName = QStringLiteral("cmd");
|
||||
static const QString commentGroupName = QStringLiteral("comment");
|
||||
|
||||
int posOffset = element.outer.position();
|
||||
uint posLine = element.line;
|
||||
uint posColumn = element.column;
|
||||
|
||||
auto matchIter = RE.globalMatch(element.inner);
|
||||
while(matchIter.hasNext()) {
|
||||
auto match = matchIter.next();
|
||||
auto cmd = match.captured(cmdGroupName);
|
||||
auto comment = match.captured(commentGroupName);
|
||||
|
||||
const auto localOuterRef = match.capturedRef(0);
|
||||
const auto localInnerRef = match.capturedRef(innerGroupName);
|
||||
|
||||
auto outerRef = QStringRef(&_text, localOuterRef.position(), localOuterRef.length());
|
||||
auto innerRef = QStringRef(&_text, localInnerRef.position(), localInnerRef.length());
|
||||
|
||||
while(posOffset < outerRef.position() && posOffset < _text.size()) {
|
||||
if(_text[posOffset++] == QLatin1Char('\n')) {
|
||||
++posLine;
|
||||
posColumn = 1;
|
||||
} else {
|
||||
++posColumn;
|
||||
}
|
||||
}
|
||||
|
||||
if(!cmd.isEmpty()) {
|
||||
QStringList cmdArgv;
|
||||
auto cmdArgIter = CMD_SPLIT_RE.globalMatch(cmd);
|
||||
while(cmdArgIter.hasNext()) {
|
||||
auto cmdArg = cmdArgIter.next();
|
||||
cmdArgv += cmdArg.captured(cmdArg.captured(1).isEmpty() ? 0 : 1);
|
||||
cmdArgv.last().replace(UNESCAPE_RE, QStringLiteral("\1"));
|
||||
}
|
||||
|
||||
Template::Element childStub = Template::Element(&element);
|
||||
childStub.outer = outerRef;
|
||||
childStub.name = QLatin1Char('!') + cmd;
|
||||
childStub.inner = innerRef;
|
||||
childStub.line = posLine;
|
||||
childStub.column = posColumn;
|
||||
executeCommand(element, childStub, cmdArgv);
|
||||
} else if (!comment.isEmpty()) {
|
||||
element.children.append(Element(&element));
|
||||
Template::Element &child = element.children.last();
|
||||
child.outer = outerRef;
|
||||
child.name = QString();
|
||||
child.inner = QStringRef();
|
||||
child.line = posLine;
|
||||
child.column = posColumn;
|
||||
child.isComment = true;
|
||||
} else {
|
||||
element.children.append(Element(&element));
|
||||
Template::Element &child = element.children.last();
|
||||
child.outer = outerRef;
|
||||
child.name = match.captured(nameGroupName);
|
||||
child.inner = innerRef;
|
||||
child.line = posLine;
|
||||
child.column = posColumn;
|
||||
if(!child.inner.isEmpty())
|
||||
parseRecursively(child);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int Template::generateRecursively(QString &result, const Template::Element &element, const Var &data, int consumed) {
|
||||
int consumedDataItems = consumed;
|
||||
|
||||
if(!element.children.isEmpty()) {
|
||||
int totalDataItems;
|
||||
switch(data.dataType()) {
|
||||
case Var::DataType::Number:
|
||||
case Var::DataType::String:
|
||||
case Var::DataType::Map:
|
||||
totalDataItems = 1;
|
||||
break;
|
||||
case Var::DataType::Vector:
|
||||
totalDataItems = data.vec.size();
|
||||
break;
|
||||
case Var::DataType::Invalid:
|
||||
default:
|
||||
Q_UNREACHABLE();
|
||||
}
|
||||
|
||||
while(consumedDataItems < totalDataItems) {
|
||||
int prevChildEndPosition = element.inner.position();
|
||||
for(const auto &child: element.children) {
|
||||
const int characterCountBetweenChildren = child.outer.position() - prevChildEndPosition;
|
||||
if(characterCountBetweenChildren > 0) {
|
||||
// Add text between previous child (or inner beginning) and this child.
|
||||
result += unescape(_text.midRef(prevChildEndPosition, characterCountBetweenChildren));
|
||||
} else if(characterCountBetweenChildren < 0) {
|
||||
// Repeated item; they overlap and end1 > start2
|
||||
result += unescape(element.inner.mid(prevChildEndPosition - element.inner.position()));
|
||||
result += unescape(element.inner.left(child.outer.position() - element.inner.position()));
|
||||
}
|
||||
|
||||
switch(data.dataType()) {
|
||||
case Var::DataType::Number:
|
||||
case Var::DataType::String:
|
||||
generateRecursively(result, child, data);
|
||||
consumedDataItems = 1; // Deepest child always consumes number/string
|
||||
break;
|
||||
case Var::DataType::Vector:
|
||||
if(!data.vec.isEmpty()) {
|
||||
if(!child.hasName() && !child.isCommand() && consumedDataItems < data.vec.size()) {
|
||||
consumedDataItems += generateRecursively(result, child, data[consumedDataItems]);
|
||||
} else {
|
||||
consumedDataItems += generateRecursively(result, child, data.vec.mid(consumedDataItems));
|
||||
}
|
||||
} else {
|
||||
warn(child, QStringLiteral("no more items available in parent's list."));
|
||||
}
|
||||
break;
|
||||
case Var::DataType::Map:
|
||||
if(!child.hasName()) {
|
||||
consumedDataItems = generateRecursively(result, child, data);
|
||||
} else if(data.map.contains(child.name)) {
|
||||
generateRecursively(result, child, data.map[child.name]);
|
||||
// Always consume, repeating doesn't change anything
|
||||
consumedDataItems = 1;
|
||||
} else {
|
||||
warn(child, QStringLiteral("missing value for the element in parent's map."));
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
prevChildEndPosition = child.outer.position() + child.outer.length();
|
||||
}
|
||||
|
||||
result += unescape(element.inner.mid(prevChildEndPosition - element.inner.position(), -1));
|
||||
|
||||
if(element.isCommand()) {
|
||||
break;
|
||||
}
|
||||
|
||||
const bool isLast = consumedDataItems >= totalDataItems;
|
||||
if(!isLast) {
|
||||
// Collapse empty lines between elements
|
||||
int nlNum = 0;
|
||||
for(int i = 0; i < element.inner.size() / 2; ++i) {
|
||||
if(element.inner.at(i) == QLatin1Char('\n') &&
|
||||
element.inner.at(i) == element.inner.at(element.inner.size() - i - 1))
|
||||
nlNum++;
|
||||
else
|
||||
break;
|
||||
}
|
||||
if(nlNum > 0)
|
||||
result.chop(nlNum);
|
||||
}
|
||||
}
|
||||
} else if (!element.isComment) {
|
||||
// Handle leaf element
|
||||
switch(data.dataType()) {
|
||||
case Var::DataType::Number: {
|
||||
const QString fmt = element.findFmt(Var::DataType::Number);
|
||||
result += QString::asprintf(qUtf8Printable(fmt), data.num);
|
||||
break;
|
||||
}
|
||||
case Var::DataType::String: {
|
||||
const QString fmt = element.findFmt(Var::DataType::String);
|
||||
result += QString::asprintf(qUtf8Printable(fmt), qUtf8Printable(data.str));
|
||||
break;
|
||||
}
|
||||
case Var::DataType::Vector:
|
||||
if(data.vec.isEmpty()) {
|
||||
warn(element, QStringLiteral("got empty list."));
|
||||
} else if(data.vec.at(0).dataType() == Var::DataType::Number) {
|
||||
const QString fmt = element.findFmt(Var::DataType::Number);
|
||||
result += QString::asprintf(qUtf8Printable(fmt), data.num);
|
||||
} else if(data.vec.at(0).dataType() == Var::DataType::String) {
|
||||
const QString fmt = element.findFmt(Var::DataType::String);
|
||||
result += QString::asprintf(qUtf8Printable(fmt), qUtf8Printable(data.str));
|
||||
} else {
|
||||
warn(element, QStringLiteral("the list entry data type (%1) is not supported in childrenless elements.").
|
||||
arg(data.vec.at(0).dataTypeAsString()));
|
||||
}
|
||||
break;
|
||||
case Var::DataType::Map:
|
||||
warn(element, QStringLiteral("map type is not supported in childrenless elements."));
|
||||
break;
|
||||
case Var::DataType::Invalid:
|
||||
break;
|
||||
}
|
||||
consumedDataItems = 1;
|
||||
}
|
||||
|
||||
return consumedDataItems;
|
||||
}
|
||||
|
||||
/*
|
||||
void dbgDumpTree(const Template::Element &element) {
|
||||
static int indent = 0;
|
||||
QString type;
|
||||
if(element.isCommand())
|
||||
type = QStringLiteral("command");
|
||||
else if(element.isComment)
|
||||
type = QStringLiteral("comment");
|
||||
else if(element.hasName() && element.inner.isEmpty())
|
||||
type = QStringLiteral("empty named");
|
||||
else if(element.hasName())
|
||||
type = QStringLiteral("named");
|
||||
else if(element.inner.isEmpty())
|
||||
type = QStringLiteral("empty anonymous");
|
||||
else
|
||||
type = QStringLiteral("anonymous");
|
||||
|
||||
qDebug().noquote() << QStringLiteral("%1[%2] \"%3\" %4:%5")
|
||||
.arg(QStringLiteral("· ").repeated(indent), type, element.name)
|
||||
.arg(element.line)
|
||||
.arg(element.column);
|
||||
indent++;
|
||||
for(const auto &child: element.children) {
|
||||
dbgDumpTree(child);
|
||||
}
|
||||
indent--;
|
||||
}
|
||||
*/
|
||||
77
tools/uni2characterwidth/template.example
Normal file
77
tools/uni2characterwidth/template.example
Normal file
@@ -0,0 +1,77 @@
|
||||
«*COMMENT:----------------------------------------------------------------------
|
||||
|
||||
Tags:
|
||||
|
||||
«*anything:comment where everything but closing sequence is allowed:anything*»
|
||||
|
||||
«NAME:any content, including other tags. \: have to be escaped. It is processed
|
||||
using data passed from code() function under NAME key. It should contain other
|
||||
tags, without them this text will be replaced with passed data or removed.»
|
||||
|
||||
«NAME» - like before, used when data should replace it, so content is
|
||||
unnecessary
|
||||
|
||||
EXAMPLE:
|
||||
data: Map{ "exampleA", Map{ { "Number", 42 }, { "String", "hello" } } }
|
||||
template: «exampleA:number\: «Number», string\: «String»»
|
||||
result: number: 42, string: hello
|
||||
|
||||
«» - empty anonymous element. Used in named elements which receive lists.
|
||||
The element will be replaced with list item, and duplicated if
|
||||
|
||||
«:anonymous container. It should contain some elements which receive data.
|
||||
The element will disappear when child element will not receive any value.
|
||||
Useful to add suffixes/prefixes to data»
|
||||
|
||||
EXAMPLE:
|
||||
data: Map{ "exampleB", Vector{ 1, 2, 3, 4, 5, 6, 7 } }
|
||||
template: «exampleB:«:[«»] »»
|
||||
result: [1] [2] [3] [4] [5] [6] [7]
|
||||
|
||||
data: Map{ "exampleC", Vector{ "a", "b", "c" } }
|
||||
template: «exampleC:«:first = «»»«:, second = «»»«:, third = «»»«:, fourth = «»»»
|
||||
result: first = a, second = b, third = c
|
||||
|
||||
«!fmt "XXX":a wrapper which sets printf-like format XXX for numbers and
|
||||
strings inside it. Starts with %.»
|
||||
|
||||
«!repeat N:repeats contents inside N times.»
|
||||
|
||||
EXAMPLE:
|
||||
data: Map{ "exampleD", Vector{ 1, 2, 3, 4, 10, 11, 12, 13 } }
|
||||
template: «exampleD:«!fmt "%#.2x":«!repeat 3:«» »«»; »»
|
||||
result: 0x01 0x02 0x03 0x04; 0x0a 0x0b 0x0c 0x0d;
|
||||
|
||||
D: «exampleD:«!fmt "%#.2x":«!repeat 3:«» »«»; »»
|
||||
----------------------------------------------------------------------:COMMENT*»
|
||||
For available data see code() function. Below are usage examples
|
||||
|
||||
Warning about generated file - putting "this is a generated file" text in a
|
||||
template file could be misleading.
|
||||
«gen-file-warning»
|
||||
|
||||
|
||||
Command used to generate the file:
|
||||
«cmdline»
|
||||
|
||||
|
||||
Direct LUT - widths of the first 256 code points in direct access array:
|
||||
{«!fmt "% d":«direct-lut:
|
||||
«!repeat 32:«:«»,»»
|
||||
»»}
|
||||
|
||||
|
||||
Arrays with code point ranges for every width:
|
||||
«ranges-luts:«:
|
||||
«name» = {«!fmt "%#.6x":«ranges:
|
||||
«!repeat 8:«:{«first»,«last»},»»
|
||||
»»}
|
||||
Number of elements in the array: «size»
|
||||
|
||||
»»
|
||||
List of array names, sizes, and widths:
|
||||
{«ranges-lut-list:
|
||||
«:{«!fmt "% d":«width»», «!fmt "%-16s":«name»», «size»},»
|
||||
»}
|
||||
Number of elements in the array: «ranges-lut-list-size»;
|
||||
|
||||
184
tools/uni2characterwidth/template.h
Normal file
184
tools/uni2characterwidth/template.h
Normal file
@@ -0,0 +1,184 @@
|
||||
/*
|
||||
This file is part of Konsole, a terminal emulator for KDE.
|
||||
|
||||
Copyright 2018 by Mariusz Glebocki <mglb@arccos-1.net>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
02110-1301 USA.
|
||||
*/
|
||||
|
||||
#ifndef TEMPLATE_H
|
||||
#define TEMPLATE_H
|
||||
|
||||
#include <QMap>
|
||||
#include <QString>
|
||||
#include <QVector>
|
||||
|
||||
// Backward compatibility
|
||||
#if QT_VERSION < QT_VERSION_CHECK(5, 7, 0) && !defined(qAsConst)
|
||||
#define qAsConst(code) code
|
||||
#endif
|
||||
|
||||
// QVariant doesn't offer modification in place. Var does.
|
||||
class Var {
|
||||
public:
|
||||
using Number = qint64;
|
||||
using String = QString;
|
||||
using Map = QMap<String, Var>;
|
||||
using Vector = QVector<Var>;
|
||||
|
||||
enum class DataType {
|
||||
Invalid,
|
||||
Number,
|
||||
String,
|
||||
Vector,
|
||||
Map,
|
||||
};
|
||||
|
||||
const QString dataTypeAsString() const {
|
||||
switch(dataType()) {
|
||||
case DataType::Invalid: return QStringLiteral("Invalid");
|
||||
case DataType::Number: return QStringLiteral("Number");
|
||||
case DataType::String: return QStringLiteral("String");
|
||||
case DataType::Vector: return QStringLiteral("Vector");
|
||||
case DataType::Map: return QStringLiteral("Map");
|
||||
default: return QStringLiteral("Unknown?");
|
||||
}
|
||||
}
|
||||
|
||||
Var(): num(0), _dataType(DataType::Invalid) {}
|
||||
Var(const Var &other) { *this = other; }
|
||||
|
||||
Var(const Number &newNum): _dataType(DataType::Number) { new(&num) auto(newNum); }
|
||||
Var(const String &newStr): _dataType(DataType::String) { new(&str) auto(newStr); }
|
||||
Var(const Vector &newVec): _dataType(DataType::Vector) { new(&vec) auto(newVec); }
|
||||
Var(const Map &newMap): _dataType(DataType::Map) { new(&map) auto(newMap); }
|
||||
|
||||
// Allow initialization without type name
|
||||
Var(const char * newStr): _dataType(DataType::String) { new(&str) String(QString::fromUtf8(newStr)); }
|
||||
Var(std::initializer_list<Var> newVec): _dataType(DataType::Vector) { new(&vec) Vector(newVec); }
|
||||
|
||||
~Var() {
|
||||
switch(dataType()) {
|
||||
case DataType::String: str.~String(); break;
|
||||
case DataType::Vector: vec.~Vector(); break;
|
||||
case DataType::Map: map.~Map(); break;
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
|
||||
Var & operator=(const Var &other) {
|
||||
_dataType = other.dataType();
|
||||
switch(other.dataType()) {
|
||||
case DataType::Number: new(&num) auto(other.num); break;
|
||||
case DataType::String: new(&str) auto(other.str); break;
|
||||
case DataType::Vector: new(&vec) auto(other.vec); break;
|
||||
case DataType::Map: new(&map) auto(other.map); break;
|
||||
default: break;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
Var & operator[](unsigned index) {
|
||||
Q_ASSERT(_dataType == DataType::Vector);
|
||||
return vec.data()[index];
|
||||
}
|
||||
const Var & operator[](unsigned index) const {
|
||||
Q_ASSERT(_dataType == DataType::Vector);
|
||||
return vec.constData()[index];
|
||||
}
|
||||
Var & operator[](const String &key) {
|
||||
Q_ASSERT(_dataType == DataType::Map);
|
||||
return map[key];
|
||||
}
|
||||
const Var & operator[](const String &key) const {
|
||||
Q_ASSERT(_dataType == DataType::Map);
|
||||
return *map.find(key);
|
||||
}
|
||||
|
||||
DataType dataType() const { return _dataType; }
|
||||
|
||||
union {
|
||||
Number num;
|
||||
String str;
|
||||
Vector vec;
|
||||
Map map;
|
||||
};
|
||||
|
||||
private:
|
||||
DataType _dataType;
|
||||
};
|
||||
|
||||
class Template {
|
||||
public:
|
||||
Template(const QString &text);
|
||||
void parse();
|
||||
QString generate(const Var &data);
|
||||
|
||||
struct Element {
|
||||
Element(const Element *parent = nullptr, const QString &name = QString())
|
||||
: outer()
|
||||
, inner()
|
||||
, name(name)
|
||||
, fmt()
|
||||
, line(0)
|
||||
, column(0)
|
||||
, isComment(false)
|
||||
, children()
|
||||
, parent(parent) {}
|
||||
|
||||
Element(const Element &other)
|
||||
: outer(other.outer)
|
||||
, inner(other.inner)
|
||||
, name(other.name)
|
||||
, fmt(other.fmt)
|
||||
, line(other.line)
|
||||
, column(other.column)
|
||||
, isComment(other.isComment)
|
||||
, parent(other.parent) {
|
||||
for(const auto &child: other.children) {
|
||||
children.append(child);
|
||||
}
|
||||
}
|
||||
|
||||
const QString findFmt(Var::DataType type) const;
|
||||
QString path() const;
|
||||
bool isCommand() const { return name.startsWith(QLatin1Char('!')); }
|
||||
bool hasName() const { return !isCommand() && !name.isEmpty(); }
|
||||
|
||||
static const QString defaultFmt(Var::DataType type);
|
||||
static bool isValidFmt(const QString &fmt, Var::DataType type);
|
||||
|
||||
QStringRef outer;
|
||||
QStringRef inner;
|
||||
QString name;
|
||||
QString fmt;
|
||||
uint line;
|
||||
uint column;
|
||||
bool isComment;
|
||||
QList<Element> children;
|
||||
const Element *parent;
|
||||
};
|
||||
private:
|
||||
|
||||
void executeCommand(Element &element, const Element &childStub, const QStringList &argv);
|
||||
void parseRecursively(Element &element);
|
||||
int generateRecursively(QString &result, const Element &element, const Var &data, int consumed = 0);
|
||||
|
||||
QString _text; // FIXME: make it pointer (?)
|
||||
Element _root; // FIXME: make it pointer
|
||||
};
|
||||
|
||||
#endif
|
||||
1011
tools/uni2characterwidth/uni2characterwidth.cpp
Normal file
1011
tools/uni2characterwidth/uni2characterwidth.cpp
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user