28 #include <QXmlStreamReader>
29 #include <QStringList>
39 #define QL1S(x) QLatin1String(x)
46 const int maxlen = 80;
47 if (str.length() <= maxlen)
50 return str.left(maxlen).append(QLatin1String(
"..."));
111 class KuitSemanticsStaticData
136 KuitSemanticsStaticData ();
139 KuitSemanticsStaticData::KuitSemanticsStaticData ()
144 #define SETUP_TAG(tag, name, atts, subs) do { \
145 knownTags.insert(QString::fromLatin1(name), Kuit::Tag::tag); \
146 tagNames.insert(Kuit::Tag::tag, QString::fromLatin1(name)); \
148 using namespace Kuit::Att; \
149 tagAtts[Kuit::Tag::tag] << atts; \
152 using namespace Kuit::Tag; \
153 tagSubs[Kuit::Tag::tag] << subs << NumIntg << NumReal; \
159 Filename << Link << Application << Command << Resource << Icode << \
160 Shortcut << Interface << Emphasis << Placeholder << Email << \
197 #define SETUP_ATT(att, name) do { \
198 knownAtts.insert(QString::fromLatin1(name), Kuit::Att::att); \
212 #define SETUP_FMT(fmt, name) do { \
213 knownFmts.insert(QString::fromLatin1(name), Kuit::Fmt::fmt); \
221 #define SETUP_ROL(rol, name, fmt, cues) do { \
222 knownRols.insert(QString::fromLatin1(name), Kuit::Rol::rol); \
223 defFmts[Kuit::Rol::rol][Kuit::Cue::None] = Kuit::Fmt::fmt; \
225 using namespace Kuit::Cue; \
226 rolCues[Kuit::Rol::rol] << cues; \
244 #undef SETUP_ROLCUEFMT
245 #define SETUP_ROLCUEFMT(rol, cue, fmt) do { \
246 defFmts[Kuit::Rol::rol][Kuit::Cue::cue] = Kuit::Fmt::fmt; \
255 #define SETUP_CUE(cue, name) do { \
256 knownCues.insert(QString::fromLatin1(name), Kuit::Cue::cue); \
287 qtHtmlTagNames <<
QL1S(
"a") <<
QL1S(
"address") <<
QL1S(
"b") <<
QL1S(
"big") <<
QL1S(
"blockquote")
300 #define SETUP_TAG_NL(tag, nlead) do { \
301 leadingNewlines.insert(Kuit::Tag::tag, nlead); \
311 xmlEntities[QString::fromLatin1(
"lt")] =
QString(QLatin1Char(
'<'));
312 xmlEntities[QString::fromLatin1(
"gt")] =
QString(QLatin1Char(
'>'));
313 xmlEntities[QString::fromLatin1(
"amp")] =
QString(QLatin1Char(
'&'));
314 xmlEntities[QString::fromLatin1(
"apos")] =
QString(QLatin1Char(
'\''));
315 xmlEntities[QString::fromLatin1(
"quot")] =
QString(QLatin1Char(
'"'));
316 xmlEntitiesInverse[
QString(QLatin1Char(
'<'))] = QString::fromLatin1(
"lt");
317 xmlEntitiesInverse[
QString(QLatin1Char(
'>'))] = QString::fromLatin1(
"gt");
318 xmlEntitiesInverse[
QString(QLatin1Char(
'&'))] = QString::fromLatin1(
"amp");
319 xmlEntitiesInverse[
QString(QLatin1Char(
'\''))] = QString::fromLatin1(
"apos");
320 xmlEntitiesInverse[
QString(QLatin1Char(
'"'))] = QString::fromLatin1(
"quot");
329 class KuitSemanticsPrivate
333 KuitSemanticsPrivate (
const QString &lang_);
338 QString metaTr (
const char *ctxt,
const char *
id)
const;
341 void setFormattingPatterns ();
344 void setTextTransformData ();
366 bool hadQtTag =
false,
367 bool hadAnyHtmlTag =
false)
const;
377 typedef enum { Proper, Ignored, Dropout } Handling;
389 KuitSemanticsPrivate::OpenEl parseOpenEl (
const QXmlStreamReader &xml,
401 static void countWrappingNewlines (
const QString &ptext,
402 int &numle,
int &numtr);
426 KuitSemanticsPrivate::KuitSemanticsPrivate (
const QString &lang)
437 m_metaCat =
new KCatalog(QString::fromLatin1(
"kdelibs4"), lang);
440 setFormattingPatterns();
443 setTextTransformData();
450 QString KuitSemanticsPrivate::metaTr (
const char *ctxt,
const char *
id)
const
452 if (m_metaCat == NULL) {
453 return QString::fromLatin1(
id);
455 return m_metaCat->translate(ctxt,
id);
458 void KuitSemanticsPrivate::setFormattingPatterns ()
460 using namespace Kuit;
464 #define SET_PATTERN(tag, atts, fmt, ctxt_ptrn) do { \
467 int akey = attSetKey(aset); \
468 QString pattern = metaTr(ctxt_ptrn); \
469 m_patterns[tag][akey][fmt] = pattern; \
471 if (fmt == Fmt::Plain && !m_patterns[tag][akey].contains(Fmt::Term)) { \
472 m_patterns[tag][akey][Fmt::Term] = pattern; \
478 #define I18N_NOOP2(ctxt, msg) ctxt, msg
483 #define XXXX_NOOP2(ctxt, msg) ctxt, msg
558 "%1 is the note label, %2 is the text",
563 "%1 is the note label, %2 is the text",
575 "<b>Warning</b>: %1"));
578 "%1 is the warning label, %2 is the text",
583 "%1 is the warning label, %2 is the text",
595 "<a href=\"%1\">%1</a>"));
598 "%1 is the URL, %2 is the descriptive text",
603 "%1 is the URL, %2 is the descriptive text",
605 "<a href=\"%1\">%2</a>"));
638 "%1 is the command name, %2 is its man section",
643 "%1 is the command name, %2 is its man section",
723 "<<i>%1</i>>"));
733 "<<a href=\"mailto:%1\">%1</a>>"));
736 "%1 is name, %2 is address",
741 "%1 is name, %2 is address",
743 "<a href=\"mailto:%2\">%1</a>"));
776 void KuitSemanticsPrivate::setTextTransformData ()
780 #define I18N_NOOP2(ctxt, msg) metaTr(ctxt, msg)
801 #define SET_KEYNAME(rawname) do { \
803 QString normname = QString::fromLatin1(rawname).trimmed().toLower(); \
804 m_keyNames[normname] = metaTr("keyboard-key-name", rawname); \
809 #define I18N_NOOP2(ctxt, msg) msg
859 Kuit::FmtVar fmtExplicit = formatFromContextMarker(ctxt, text);
862 if (text.indexOf(QLatin1Char(
'<')) < 0) {
863 return finalizeVisualText(text, fmtExplicit);
870 fmtImplicit = formatFromTags(text);
876 QString wtext = equipTopTag(text, toptag);
879 QString ftext = semanticToVisualText(wtext, fmtExplicit, fmtImplicit);
880 if (ftext.isEmpty()) {
881 return salvageMarkup(text, fmtImplicit);
900 Kuit::FmtVar KuitSemanticsPrivate::formatFromContextMarker (
907 KuitSemanticsStaticData *s = semanticsStaticData;
914 QString ctxmark = ctxmark_.trimmed();
915 if (ctxmark.startsWith(QLatin1Char(
'@'))) {
916 static QRegExp wsRx(QString::fromLatin1(
"\\s"));
917 ctxmark = ctxmark.mid(1, wsRx.indexIn(ctxmark) - 1);
920 int pfmt = ctxmark.indexOf(QLatin1Char(
'/'));
922 fmtname = ctxmark.mid(pfmt + 1);
923 ctxmark = ctxmark.left(pfmt);
927 int pcue = ctxmark.indexOf(QLatin1Char(
':'));
929 cuename = ctxmark.mid(pcue + 1);
930 ctxmark = ctxmark.left(pcue);
939 rolname = rolname.trimmed().toLower();
940 cuename = cuename.trimmed().toLower();
941 fmtname = fmtname.trimmed().toLower();
945 if (s->knownRols.contains(rolname)) {
946 rol = s->knownRols[rolname];
950 if (!rolname.isEmpty()) {
951 kDebug(173) << QString::fromLatin1(
"Unknown semantic role '@%1' in "
952 "context marker for message {%2}.")
959 if (s->knownCues.contains(cuename)) {
960 cue = s->knownCues[cuename];
964 if (!cuename.isEmpty()) {
965 kDebug(173) << QString::fromLatin1(
"Unknown interface subcue ':%1' in "
966 "context marker for message {%2}.")
973 if (s->knownFmts.contains(fmtname)) {
974 fmt = s->knownFmts[fmtname];
980 if (s->defFmts.contains(rol)) {
981 if (s->defFmts[rol].contains(cue)) {
982 fmt = s->defFmts[rol][cue];
992 if (!fmtname.isEmpty()) {
993 kDebug(173) << QString::fromLatin1(
"Unknown visual format '/%1' in "
994 "context marker for message {%2}.")
1004 KuitSemanticsStaticData *s = semanticsStaticData;
1005 static QRegExp staticTagRx(QString::fromLatin1(
"<\\s*(\\w+)[^>]*>"));
1007 QRegExp tagRx = staticTagRx;
1008 int p = tagRx.indexIn(text);
1010 QString tagname = tagRx.capturedTexts().at(1).toLower();
1011 if (s->qtHtmlTagNames.contains(tagname)) {
1014 p = tagRx.indexIn(text, p + tagRx.matchedLength());
1022 KuitSemanticsStaticData *s = semanticsStaticData;
1027 static QRegExp opensWithTagRx(QString::fromLatin1(
"^\\s*<\\s*(\\w+)[^>]*>"));
1028 bool explicitTopTag =
false;
1031 int p = opensWithTagRx.indexIn(text);
1035 QString fullmatch = opensWithTagRx.capturedTexts().at(0);
1036 QString tagname = opensWithTagRx.capturedTexts().at(1).toLower();
1037 if (tagname == QLatin1String(
"qt") || tagname == QLatin1String(
"html")) {
1040 text = text.mid(fullmatch.length());
1041 p = opensWithTagRx.indexIn(text);
1047 QString tagname = opensWithTagRx.capturedTexts().at(1).toLower();
1048 if (s->knownTags.contains(tagname)) {
1053 explicitTopTag =
true;
1073 if (!explicitTopTag) {
1074 return QLatin1Char(
'<') + s->tagNames[toptag] + QLatin1Char(
'>')
1076 + QLatin1String(
"</") + s->tagNames[toptag] + QLatin1Char(
'>');
1083 #define ENTITY_SUBRX "[a-z]+|#[0-9]+|#x[0-9a-fA-F]+"
1085 QString KuitSemanticsPrivate::semanticToVisualText (
const QString &text_,
1089 KuitSemanticsStaticData *s = semanticsStaticData;
1095 int p = original.indexOf(QLatin1Char(
'&'));
1097 text.append(original.mid(0, p + 1));
1098 original.remove(0, p + 1);
1099 static QRegExp restRx(QString::fromLatin1(
"^("ENTITY_SUBRX");"));
1100 if (original.indexOf(restRx) != 0) {
1101 text.append(QLatin1String(
"amp;"));
1103 p = original.indexOf(QLatin1Char(
'&'));
1105 text.append(original);
1110 bool hadQtTag =
false;
1111 bool hadAnyHtmlTag =
false;
1112 QStack<OpenEl> openEls;
1113 QXmlStreamReader xml(text);
1114 QStringRef lastElementName;
1116 while (!xml.atEnd()) {
1119 if (xml.isStartElement()) {
1120 lastElementName = xml.name();
1124 for (
int i = openEls.size() - 1; i >= 0; --i) {
1125 if (openEls[i].handling == OpenEl::Proper) {
1126 etag = openEls[i].tag;
1132 OpenEl oel = parseOpenEl(xml, etag, text);
1133 if (oel.name == QLatin1String(
"qt") || oel.name == QLatin1String(
"html")) {
1136 if (s->qtHtmlTagNames.contains(oel.name)) {
1137 hadAnyHtmlTag =
true;
1144 fmtExp = formatFromContextMarker(oel.avals[
Kuit::Att::Ctx], text);
1156 else if (xml.isEndElement()) {
1158 OpenEl oel = openEls.pop();
1161 if (openEls.isEmpty()) {
1163 return finalizeVisualText(oel.formattedText, fmtExp,
1164 hadQtTag, hadAnyHtmlTag);
1168 QString pt = openEls.top().formattedText;
1169 openEls.top().formattedText += formatSubText(pt, oel, fmtImp, numCtx);
1176 else if (xml.isCharacters()) {
1180 QString text = xml.text().toString();
1182 foreach (
const QChar &c, text) {
1183 if (s->xmlEntitiesInverse.contains(c)) {
1184 const QString entname = s->xmlEntitiesInverse[c];
1185 ntext += QLatin1Char(
'&') + entname + QLatin1Char(
';');
1190 openEls.top().formattedText += ntext;
1194 if (xml.hasError()) {
1195 kDebug(173) << QString::fromLatin1(
"Markup error in message {%1}: %2. Last tag parsed: %3")
1196 .arg(
shorten(text), xml.errorString(), lastElementName.toString());
1204 KuitSemanticsPrivate::OpenEl
1205 KuitSemanticsPrivate::parseOpenEl (
const QXmlStreamReader &xml,
1213 KuitSemanticsStaticData *s = semanticsStaticData;
1216 oel.name = xml.name().toString().toLower();
1220 foreach (
const QXmlStreamAttribute &xatt, xml.attributes()) {
1221 attnams += xatt.name().toString().toLower();
1222 attvals += xatt.value().toString();
1223 QChar qc = attvals.last().indexOf(QLatin1Char(
'\'')) < 0 ? QLatin1Char(
'\'') : QLatin1Char(
'"');
1224 oel.astr += QLatin1Char(
' ') + attnams.last() + QLatin1Char(
'=') + qc + attvals.last() + qc;
1227 if (s->knownTags.contains(oel.name)) {
1228 oel.tag = s->knownTags[oel.name];
1233 oel.handling = OpenEl::Proper;
1236 oel.handling = OpenEl::Dropout;
1237 kDebug(173) << QString::fromLatin1(
"Tag '%1' cannot be subtag of '%2' "
1239 .arg(s->tagNames[oel.tag], s->tagNames[etag],
1245 for (
int i = 0; i < attnams.size(); ++i) {
1246 if (s->knownAtts.contains(attnams[i])) {
1248 if (s->tagAtts[oel.tag].contains(att)) {
1250 oel.avals[att] = attvals[i];
1253 kDebug(173) << QString::fromLatin1(
"Attribute '%1' cannot be used in "
1254 "tag '%2' in message {%3}.")
1255 .arg(attnams[i], oel.name,
1260 kDebug(173) << QString::fromLatin1(
"Unknown semantic tag attribute '%1' "
1262 .arg(attnams[i],
shorten(text));
1265 oel.akey = attSetKey(attset);
1267 else if (oel.name == QLatin1String(
"qt") || oel.name == QLatin1String(
"html")) {
1269 oel.handling = OpenEl::Dropout;
1272 oel.handling = OpenEl::Ignored;
1273 if (!s->qtHtmlTagNames.contains(oel.name)) {
1274 kDebug(173) << QString::fromLatin1(
"Tag '%1' is neither semantic nor HTML in "
1276 .arg(oel.name,
shorten(text));
1287 QString pattern = QString::fromLatin1(
"%1");
1290 if ( m_patterns.contains(tag)
1291 && m_patterns[tag].contains(akey)
1292 && m_patterns[tag][akey].contains(fmt))
1294 pattern = m_patterns[tag][akey][fmt];
1300 QString KuitSemanticsPrivate::formatSubText (
const QString &ptext,
1305 KuitSemanticsStaticData *s = semanticsStaticData;
1307 if (oel.handling == OpenEl::Proper) {
1309 QString pattern = visualPattern(oel.tag, oel.akey, fmt);
1312 QString mtext = modifyTagText(oel.formattedText, oel.tag, oel.avals,
1315 using namespace Kuit;
1320 ftext = pattern.arg(oel.avals[
Att::Url], mtext);
1329 ftext = pattern.arg(oel.avals[
Att::Label], mtext);
1332 ftext = pattern.arg(oel.avals[
Att::Label], mtext);
1335 ftext = pattern.arg(mtext);
1340 if (!ptext.isEmpty() && s->leadingNewlines.contains(oel.tag)) {
1342 int pnumle, pnumtr, fnumle, fnumtr;
1343 countWrappingNewlines(ptext, pnumle, pnumtr);
1344 countWrappingNewlines(ftext, fnumle, fnumtr);
1346 int numle = pnumtr + fnumle;
1349 if (numle < s->leadingNewlines[oel.tag]) {
1350 strle =
QString(s->leadingNewlines[oel.tag] - numle, QLatin1Char(
'\n'));
1352 ftext = strle + ftext;
1357 else if (oel.handling == OpenEl::Ignored) {
1358 if (oel.name == QLatin1String(
"br") || oel.name == QLatin1String(
"hr")) {
1360 return QLatin1Char(
'<') + oel.name + QLatin1String(
"/>");
1363 return QLatin1Char(
'<') + oel.name + oel.astr + QLatin1Char(
'>')
1365 + QLatin1String(
"</") + oel.name + QLatin1Char(
'>');
1369 return oel.formattedText;
1373 void KuitSemanticsPrivate::countWrappingNewlines (
const QString &text,
1374 int &numle,
int &numtr)
1376 int len = text.length();
1379 while (numle < len && text[numle] == QLatin1Char(
'\n')) {
1384 while (numtr < len && text[len - numtr - 1] == QLatin1Char(
'\n')) {
1389 QString KuitSemanticsPrivate::modifyTagText (
const QString &text,
1401 const QChar fillChar = !fillStr.isEmpty() ? fillStr[0] : QChar::fromLatin1(
' ');
1402 return QString::fromLatin1(
"%1").arg(
KGlobal::locale()->formatNumber(text,
false),
1403 fieldWidth, fillChar);
1406 return QDir::toNativeSeparators(text);
1419 QString KuitSemanticsPrivate::finalizeVisualText (
const QString &
final,
1422 bool hadAnyHtmlTag)
const
1424 KuitSemanticsStaticData *s = semanticsStaticData;
1432 static QRegExp staticEntRx(QLatin1String(
"&("ENTITY_SUBRX");"));
1435 QRegExp entRx = staticEntRx;
1436 int p = entRx.indexIn(text);
1439 QString ent = entRx.capturedTexts().at(1);
1440 plain.append(text.mid(0, p));
1441 text.remove(0, p + ent.length() + 2);
1442 if (ent.startsWith(QLatin1Char(
'#'))) {
1445 if (ent[1] == QLatin1Char(
'x')) {
1446 c = QChar(ent.mid(2).toInt(&ok, 16));
1448 c = QChar(ent.mid(1).toInt(&ok, 10));
1453 plain.append(QLatin1Char(
'&') + ent + QLatin1Char(
';'));
1456 else if (s->xmlEntities.contains(ent)) {
1457 plain.append(s->xmlEntities[ent]);
1459 plain.append(QLatin1Char(
'&') + ent + QLatin1Char(
';'));
1461 p = entRx.indexIn(text);
1469 text = QString::fromLatin1(
"<html>") + text + QLatin1String(
"</html>");
1475 QString KuitSemanticsPrivate::salvageMarkup (
const QString &text_,
1478 KuitSemanticsStaticData *s = semanticsStaticData;
1486 static QRegExp staticWrapRx(QLatin1String(
"(<\\s*(\\w+)\\b([^>]*)>)(.*)(<\\s*/\\s*\\2\\s*>)"));
1487 QRegExp wrapRx = staticWrapRx;
1488 wrapRx.setMinimal(
true);
1492 int previousPos = pos;
1493 pos = wrapRx.indexIn(text, previousPos);
1495 ntext += text.mid(previousPos);
1498 ntext += text.mid(previousPos, pos - previousPos);
1500 QString tagname = capts[2].toLower();
1501 QString content = salvageMarkup(capts[4], fmt);
1502 if (s->knownTags.contains(tagname)) {
1505 QString pattern = visualPattern(s->knownTags[tagname], 0, fmt);
1506 ntext += pattern.arg(content);
1508 ntext += capts[1] + content + capts[5];
1510 pos += wrapRx.matchedLength();
1515 static QRegExp staticNowrRx(QLatin1String(
"<\\s*(\\w+)\\b([^>]*)/\\s*>"));
1516 QRegExp nowrRx = staticNowrRx;
1517 nowrRx.setMinimal(
true);
1521 int previousPos = pos;
1522 pos = nowrRx.indexIn(text, previousPos);
1524 ntext += text.mid(previousPos);
1527 ntext += text.mid(previousPos, pos - previousPos);
1529 QString tagname = capts[1].toLower();
1530 if (s->knownTags.contains(tagname)) {
1531 QString pattern = visualPattern(s->knownTags[tagname], 0, fmt);
1532 ntext += pattern.arg(
QString());
1536 pos += nowrRx.matchedLength();
1547 : d(new KuitSemanticsPrivate(lang))
1558 return d->format(text, ctxt);
1563 KuitSemanticsStaticData *s = semanticsStaticData;
1566 int p1 = text.indexOf(QLatin1Char(
'&'));
1569 int p2 = text.indexOf(QLatin1Char(
';'), p1);
1570 return (p2 > p1 && s->xmlEntities.contains(text.mid(p1, p2 - p1)));
1574 int tlen = text.length();
1575 p1 = text.indexOf(QLatin1Char(
'<'));
1580 bool closing =
false;
1581 while (p1 < tlen && (text[p1].isSpace() || text[p1] == QLatin1Char(
'/'))) {
1582 if (text[p1] == QLatin1Char(
'/')) {
1591 for (
int p2 = p1; p2 < tlen; ++p2) {
1593 if (c == QLatin1Char(
'>') || (!closing && c == QLatin1Char(
'/')) || c.isSpace()) {
1594 return s->qtHtmlTagNames.contains(text.mid(p1, p2 - p1));
1595 }
else if (!c.isLetter()) {
1607 int tlen = text.length();
1609 ntext.reserve(tlen);
1610 for (
int i = 0; i < tlen; ++i) {
1612 if (c == QLatin1Char(
'&')) {
1613 ntext += QLatin1String(
"&");
1614 }
else if (c == QLatin1Char(
'<')) {
1615 ntext += QLatin1String(
"<");
1616 }
else if (c == QLatin1Char(
'>')) {
1617 ntext += QLatin1String(
">");
1618 }
else if (c == QLatin1Char(
'\'')) {
1619 ntext += QLatin1String(
"'");
1620 }
else if (c == QLatin1Char(
'"')) {
1621 ntext += QLatin1String(
""");