--- /dev/null
+/****************************************************************************
+**
+** Copyright (C) 2018 Intel Corporation
+**
+** Permission is hereby granted, free of charge, to any person obtaining a copy
+** of this software and associated documentation files (the "Software"), to deal
+** in the Software without restriction, including without limitation the rights
+** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+** copies of the Software, and to permit persons to whom the Software is
+** furnished to do so, subject to the following conditions:
+**
+** The above copyright notice and this permission notice shall be included in
+** all copies or substantial portions of the Software.
+**
+** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+** OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+** THE SOFTWARE.
+**
+****************************************************************************/
+
+#define _BSD_SOURCE 1
+#define _DEFAULT_SOURCE 1
+#ifndef __STDC_LIMIT_MACROS
+# define __STDC_LIMIT_MACROS 1
+#endif
+
+#include "cbor.h"
+#include "cborinternal_p.h"
+#include "compilersupport_p.h"
+#include "utf8_p.h"
+
+#include <inttypes.h>
+#include <string.h>
+
+/**
+ * \defgroup CborPretty Converting CBOR to text
+ * \brief Group of functions used to convert CBOR to text form.
+ *
+ * This group contains two functions that can be used to convert a \ref
+ * CborValue object to a text representation. This module attempts to follow
+ * the recommendations from RFC 7049 section 6 "Diagnostic Notation", though it
+ * has a few differences. They are noted below.
+ *
+ * TinyCBOR does not provide a way to convert from the text representation back
+ * to encoded form. To produce a text form meant to be parsed, CborToJson is
+ * recommended instead.
+ *
+ * Either of the functions in this section will attempt to convert exactly one
+ * CborValue object to text. Those functions may return any error documented
+ * for the functions for CborParsing. In addition, if the C standard library
+ * stream functions return with error, the text conversion will return with
+ * error CborErrorIO.
+ *
+ * These functions also perform UTF-8 validation in CBOR text strings. If they
+ * encounter a sequence of bytes that is not permitted in UTF-8, they will return
+ * CborErrorInvalidUtf8TextString. That includes encoding of surrogate points
+ * in UTF-8.
+ *
+ * \warning The output type produced by these functions is not guaranteed to
+ * remain stable. A future update of TinyCBOR may produce different output for
+ * the same input and parsers may be unable to handle it.
+ *
+ * \sa CborParsing, CborToJson, cbor_parser_init()
+ */
+
+/**
+ * \addtogroup CborPretty
+ * @{
+ * <h2 class="groupheader">Text format</h2>
+ *
+ * As described in RFC 7049 section 6 "Diagnostic Notation", the format is
+ * largely borrowed from JSON, but modified to suit CBOR's different data
+ * types. TinyCBOR makes further modifications to distinguish different, but
+ * similar values.
+ *
+ * CBOR values are currently encoded as follows:
+ * \par Integrals (unsigned and negative)
+ * Base-10 (decimal) text representation of the value
+ * \par Byte strings:
+ * <tt>"h'"</tt> followed by the Base16 (hex) representation of the binary data, followed by an ending quote (')
+ * \par Text strings:
+ * C-style escaped string in quotes, with C11/C++11 escaping of Unicode codepoints above U+007F.
+ * \par Tags:
+ * Tag value, with the tagged value in parentheses. No special encoding of the tagged value is performed.
+ * \par Simple types:
+ * <tt>"simple(nn)"</tt> where \c nn is the simple value
+ * \par Null:
+ * \c null
+ * \par Undefined:
+ * \c undefined
+ * \par Booleans:
+ * \c true or \c false
+ * \par Floating point:
+ * If NaN or infinite, the actual words \c NaN or \c infinite.
+ * Otherwise, the decimal representation with as many digits as necessary to ensure no loss of information.
+ * By default, float values are suffixed by "f" and half-float values suffixed by "f16" (doubles have no suffix).
+ * If the CborPrettyNumericEncodingIndicators flag is active, the values instead are encoded following the
+ * Section 6 recommended encoding indicators: float values are suffixed with "_2" and half-float with "_1".
+ * A decimal point is always present.
+ * \par Arrays:
+ * Comma-separated list of elements, enclosed in square brackets ("[" and "]").
+ * \par Maps:
+ * Comma-separated list of key-value pairs, with the key and value separated
+ * by a colon (":"), enclosed in curly braces ("{" and "}").
+ *
+ * The CborPrettyFlags enumerator contains flags to control some aspects of the
+ * encoding:
+ * \par String fragmentation
+ * When the CborPrettyShowStringFragments option is active, text and byte
+ * strings that are transmitted in fragments are shown instead inside
+ * parentheses ("(" and ")") with no preceding number and each fragment is
+ * displayed individually. If a tag precedes the string, then the output
+ * will contain a double set of parentheses. If the option is not active,
+ * the fragments are merged together and the display will not show any
+ * difference from a string transmitted with determinate length.
+ * \par Encoding indicators
+ * Numbers and lengths in CBOR can be encoded in multiple representations.
+ * If the CborPrettyIndicateOverlongNumbers option is active, numbers
+ * and lengths that are transmitted in a longer encoding than necessary
+ * will be indicated, by appending an underscore ("_") to either the
+ * number or the opening bracket or brace, followed by a number
+ * indicating the CBOR additional information: 0 for 1 byte, 1 for 2
+ * bytes, 2 for 4 bytes and 3 for 8 bytes.
+ * If the CborPrettyIndicateIndeterminateLength option is active, maps,
+ * arrays and strings encoded with indeterminate length will be marked by
+ * an underscore after the opening bracket or brace or the string (if not
+ * showing fragments), without a number after it.
+ */
+
+/**
+ * \enum CborPrettyFlags
+ * The CborPrettyFlags enum contains flags that control the conversion of CBOR to text format.
+ *
+ * \value CborPrettyNumericEncodingIndicators Use numeric encoding indicators instead of textual for float and half-float.
+ * \value CborPrettyTextualEncodingIndicators Use textual encoding indicators for float ("f") and half-float ("f16").
+ * \value CborPrettyIndicateIndeterminateLength (default) Indicate when a map or array has indeterminate length.
+ * \value CborPrettyIndicateOverlongNumbers Indicate when a number or length was encoded with more bytes than needed.
+ * \value CborPrettyShowStringFragments If the byte or text string is transmitted in chunks, show each individually.
+ * \value CborPrettyMergeStringFragment Merge all chunked byte or text strings and display them in a single entry.
+ * \value CborPrettyDefaultFlags Default conversion flags.
+ */
+
+#ifndef CBOR_NO_FLOATING_POINT
+static inline bool convertToUint64(double v, uint64_t *absolute)
+{
+ double supremum;
+ v = fabs(v);
+
+ /* C11 standard section 6.3.1.4 "Real floating and integer" says:
+ *
+ * 1 When a finite value of real floating type is converted to an integer
+ * type other than _Bool, the fractional part is discarded (i.e., the
+ * value is truncated toward zero). If the value of the integral part
+ * cannot be represented by the integer type, the behavior is undefined.
+ *
+ * So we must perform a range check that v <= UINT64_MAX, but we can't use
+ * UINT64_MAX + 1.0 because the standard continues:
+ *
+ * 2 When a value of integer type is converted to a real floating type, if
+ * the value being converted can be represented exactly in the new type,
+ * it is unchanged. If the value being converted is in the range of
+ * values that can be represented but cannot be represented exactly, the
+ * result is either the nearest higher or nearest lower representable
+ * value, chosen in an implementation-defined manner.
+ */
+ supremum = -2.0 * INT64_MIN; /* -2 * (- 2^63) == 2^64 */
+ if (v >= supremum)
+ return false;
+
+ /* Now we can convert, these two conversions cannot be UB */
+ *absolute = v;
+ return *absolute == v;
+}
+#endif
+
+static void printRecursionLimit(CborStreamFunction stream, void *out)
+{
+ stream(out, "<nesting too deep, recursion stopped>");
+}
+
+static CborError hexDump(CborStreamFunction stream, void *out, const void *ptr, size_t n)
+{
+ const uint8_t *buffer = (const uint8_t *)ptr;
+ CborError err = CborNoError;
+ while (n-- && !err)
+ err = stream(out, "%02" PRIx8, *buffer++);
+
+ return err;
+}
+
+/* This function decodes buffer as UTF-8 and prints as escaped UTF-16.
+ * On UTF-8 decoding error, it returns CborErrorInvalidUtf8TextString */
+static CborError utf8EscapedDump(CborStreamFunction stream, void *out, const void *ptr, size_t n)
+{
+ const uint8_t *buffer = (const uint8_t *)ptr;
+ const uint8_t * const end = buffer + n;
+ CborError err = CborNoError;
+
+ while (buffer < end && !err) {
+ uint32_t uc = get_utf8(&buffer, end);
+ if (uc == ~0U)
+ return CborErrorInvalidUtf8TextString;
+
+ if (uc < 0x80) {
+ /* single-byte UTF-8 */
+ unsigned char escaped = (unsigned char)uc;
+ if (uc < 0x7f && uc >= 0x20 && uc != '\\' && uc != '"') {
+ err = stream(out, "%c", (char)uc);
+ continue;
+ }
+
+ /* print as an escape sequence */
+ switch (uc) {
+ case '"':
+ case '\\':
+ break;
+ case '\b':
+ escaped = 'b';
+ break;
+ case '\f':
+ escaped = 'f';
+ break;
+ case '\n':
+ escaped = 'n';
+ break;
+ case '\r':
+ escaped = 'r';
+ break;
+ case '\t':
+ escaped = 't';
+ break;
+ default:
+ goto print_utf16;
+ }
+ err = stream(out, "\\%c", escaped);
+ continue;
+ }
+
+ /* now print the sequence */
+ if (uc > 0xffffU) {
+ /* needs surrogate pairs */
+ err = stream(out, "\\u%04" PRIX32 "\\u%04" PRIX32,
+ (uc >> 10) + 0xd7c0, /* high surrogate */
+ (uc % 0x0400) + 0xdc00);
+ } else {
+print_utf16:
+ /* no surrogate pair needed */
+ err = stream(out, "\\u%04" PRIX32, uc);
+ }
+ }
+ return err;
+}
+
+static const char *resolve_indicator(const uint8_t *ptr, const uint8_t *end, int flags)
+{
+ static const char indicators[8][3] = {
+ "_0", "_1", "_2", "_3",
+ "", "", "", /* these are not possible */
+ "_"
+ };
+ const char *no_indicator = indicators[5]; /* empty string */
+ uint8_t additional_information;
+ uint8_t expected_information;
+ uint64_t value;
+ CborError err;
+
+ if (ptr == end)
+ return NULL; /* CborErrorUnexpectedEOF */
+
+ additional_information = (*ptr & SmallValueMask);
+ if (additional_information < Value8Bit)
+ return no_indicator;
+
+ /* determine whether to show anything */
+ if ((flags & CborPrettyIndicateIndeterminateLength) &&
+ additional_information == IndefiniteLength)
+ return indicators[IndefiniteLength - Value8Bit];
+ if ((flags & CborPrettyIndicateOverlongNumbers) == 0)
+ return no_indicator;
+
+ err = _cbor_value_extract_number(&ptr, end, &value);
+ if (err)
+ return NULL; /* CborErrorUnexpectedEOF */
+
+ expected_information = Value8Bit - 1;
+ if (value >= Value8Bit)
+ ++expected_information;
+ if (value > 0xffU)
+ ++expected_information;
+ if (value > 0xffffU)
+ ++expected_information;
+ if (value > 0xffffffffU)
+ ++expected_information;
+ return expected_information == additional_information ?
+ no_indicator :
+ indicators[additional_information - Value8Bit];
+}
+
+static const char *get_indicator(const CborValue *it, int flags)
+{
+ return resolve_indicator(it->ptr, it->parser->end, flags);
+}
+
+static CborError value_to_pretty(CborStreamFunction stream, void *out, CborValue *it, int flags, int recursionsLeft);
+static CborError container_to_pretty(CborStreamFunction stream, void *out, CborValue *it, CborType containerType,
+ int flags, int recursionsLeft)
+{
+ const char *comma = "";
+ CborError err = CborNoError;
+
+ if (!recursionsLeft) {
+ printRecursionLimit(stream, out);
+ return err; /* do allow the dumping to continue */
+ }
+
+ while (!cbor_value_at_end(it) && !err) {
+ err = stream(out, "%s", comma);
+ comma = ", ";
+
+ if (!err)
+ err = value_to_pretty(stream, out, it, flags, recursionsLeft);
+
+ if (containerType == CborArrayType)
+ continue;
+
+ /* map: that was the key, so get the value */
+ if (!err)
+ err = stream(out, ": ");
+ if (!err)
+ err = value_to_pretty(stream, out, it, flags, recursionsLeft);
+ }
+ return err;
+}
+
+static CborError value_to_pretty(CborStreamFunction stream, void *out, CborValue *it, int flags, int recursionsLeft)
+{
+ CborError err = CborNoError;
+ CborType type = cbor_value_get_type(it);
+ switch (type) {
+ case CborArrayType:
+ case CborMapType: {
+ /* recursive type */
+ CborValue recursed;
+ const char *indicator = get_indicator(it, flags);
+ const char *space = *indicator ? " " : indicator;
+
+ err = stream(out, "%c%s%s", type == CborArrayType ? '[' : '{', indicator, space);
+ if (err)
+ return err;
+
+ err = cbor_value_enter_container(it, &recursed);
+ if (err) {
+ it->ptr = recursed.ptr;
+ return err; /* parse error */
+ }
+ err = container_to_pretty(stream, out, &recursed, type, flags, recursionsLeft - 1);
+ if (err) {
+ it->ptr = recursed.ptr;
+ return err; /* parse error */
+ }
+ err = cbor_value_leave_container(it, &recursed);
+ if (err)
+ return err; /* parse error */
+
+ return stream(out, type == CborArrayType ? "]" : "}");
+ }
+
+ case CborIntegerType: {
+ uint64_t val;
+ cbor_value_get_raw_integer(it, &val); /* can't fail */
+
+ if (cbor_value_is_unsigned_integer(it)) {
+ err = stream(out, "%" PRIu64, val);
+ } else {
+ /* CBOR stores the negative number X as -1 - X
+ * (that is, -1 is stored as 0, -2 as 1 and so forth) */
+ if (++val) { /* unsigned overflow may happen */
+ err = stream(out, "-%" PRIu64, val);
+ } else {
+ /* overflown
+ * 0xffff`ffff`ffff`ffff + 1 =
+ * 0x1`0000`0000`0000`0000 = 18446744073709551616 (2^64) */
+ err = stream(out, "-18446744073709551616");
+ }
+ }
+ if (!err)
+ err = stream(out, "%s", get_indicator(it, flags));
+ break;
+ }
+
+ case CborByteStringType:
+ case CborTextStringType: {
+ size_t n = 0;
+ const void *ptr;
+ bool showingFragments = (flags & CborPrettyShowStringFragments) && !cbor_value_is_length_known(it);
+ const char *separator = "";
+ char close = '\'';
+ char open[3] = "h'";
+ const char *indicator = NULL;
+
+ if (type == CborTextStringType) {
+ close = open[0] = '"';
+ open[1] = '\0';
+ }
+
+ if (showingFragments) {
+ err = stream(out, "(_ ");
+ if (!err)
+ err = _cbor_value_prepare_string_iteration(it);
+ } else {
+ err = stream(out, "%s", open);
+ }
+
+ while (!err) {
+ if (showingFragments || indicator == NULL) {
+ /* any iteration, except the second for a non-chunked string */
+ indicator = resolve_indicator(it->ptr, it->parser->end, flags);
+ }
+
+ err = _cbor_value_get_string_chunk(it, &ptr, &n, it);
+ if (!ptr)
+ break;
+
+ if (!err && showingFragments)
+ err = stream(out, "%s%s", separator, open);
+ if (!err)
+ err = (type == CborByteStringType ?
+ hexDump(stream, out, ptr, n) :
+ utf8EscapedDump(stream, out, ptr, n));
+ if (!err && showingFragments) {
+ err = stream(out, "%c%s", close, indicator);
+ separator = ", ";
+ }
+ }
+
+ if (!err) {
+ if (showingFragments)
+ err = stream(out, ")");
+ else
+ err = stream(out, "%c%s", close, indicator);
+ }
+ return err;
+ }
+
+ case CborTagType: {
+ CborTag tag;
+ cbor_value_get_tag(it, &tag); /* can't fail */
+ err = stream(out, "%" PRIu64 "%s(", tag, get_indicator(it, flags));
+ if (!err)
+ err = cbor_value_advance_fixed(it);
+ if (!err && recursionsLeft)
+ err = value_to_pretty(stream, out, it, flags, recursionsLeft - 1);
+ else if (!err)
+ printRecursionLimit(stream, out);
+ if (!err)
+ err = stream(out, ")");
+ return err;
+ }
+
+ case CborSimpleType: {
+ /* simple types can't fail and can't have overlong encoding */
+ uint8_t simple_type;
+ cbor_value_get_simple_type(it, &simple_type);
+ err = stream(out, "simple(%" PRIu8 ")", simple_type);
+ break;
+ }
+
+ case CborNullType:
+ err = stream(out, "null");
+ break;
+
+ case CborUndefinedType:
+ err = stream(out, "undefined");
+ break;
+
+ case CborBooleanType: {
+ bool val;
+ cbor_value_get_boolean(it, &val); /* can't fail */
+ err = stream(out, val ? "true" : "false");
+ break;
+ }
+
+#ifndef CBOR_NO_FLOATING_POINT
+ case CborDoubleType: {
+ const char *suffix;
+ double val;
+ int r;
+ uint64_t ival;
+
+ if (false) {
+ float f;
+ case CborFloatType:
+ cbor_value_get_float(it, &f);
+ val = f;
+ suffix = flags & CborPrettyNumericEncodingIndicators ? "_2" : "f";
+ } else if (false) {
+ uint16_t f16;
+ case CborHalfFloatType:
+#ifndef CBOR_NO_HALF_FLOAT_TYPE
+ cbor_value_get_half_float(it, &f16);
+ val = decode_half(f16);
+ suffix = flags & CborPrettyNumericEncodingIndicators ? "_1" : "f16";
+#else
+ (void)f16;
+ err = CborErrorUnsupportedType;
+ break;
+#endif
+ } else {
+ cbor_value_get_double(it, &val);
+ suffix = "";
+ }
+
+ if ((flags & CborPrettyNumericEncodingIndicators) == 0) {
+ r = fpclassify(val);
+ if (r == FP_NAN || r == FP_INFINITE)
+ suffix = "";
+ }
+
+ if (convertToUint64(val, &ival)) {
+ /* this double value fits in a 64-bit integer, so show it as such
+ * (followed by a floating point suffix, to disambiguate) */
+ err = stream(out, "%s%" PRIu64 ".%s", val < 0 ? "-" : "", ival, suffix);
+ } else {
+ /* this number is definitely not a 64-bit integer */
+ err = stream(out, "%." DBL_DECIMAL_DIG_STR "g%s", val, suffix);
+ }
+ break;
+ }
+#else
+ case CborDoubleType:
+ case CborFloatType:
+ case CborHalfFloatType:
+ err = CborErrorUnsupportedType;
+ break;
+#endif /* !CBOR_NO_FLOATING_POINT */
+
+ case CborInvalidType:
+ err = stream(out, "invalid");
+ if (err)
+ return err;
+ return CborErrorUnknownType;
+ }
+
+ if (!err)
+ err = cbor_value_advance_fixed(it);
+ return err;
+}
+
+/**
+ * Converts the current CBOR type pointed by \a value to its textual
+ * representation and writes it to the stream by calling the \a streamFunction.
+ * If an error occurs, this function returns an error code similar to
+ * \ref CborParsing.
+ *
+ * The textual representation can be controlled by the \a flags parameter (see
+ * \ref CborPrettyFlags for more information).
+ *
+ * If no error ocurred, this function advances \a value to the next element.
+ * Often, concatenating the text representation of multiple elements can be
+ * done by appending a comma to the output stream in between calls to this
+ * function.
+ *
+ * The \a streamFunction function will be called with the \a token value as the
+ * first parameter and a printf-style format string as the second, with a variable
+ * number of further parameters.
+ *
+ * \sa cbor_value_to_pretty(), cbor_value_to_json_advance()
+ */
+CborError cbor_value_to_pretty_stream(CborStreamFunction streamFunction, void *token, CborValue *value, int flags)
+{
+ return value_to_pretty(streamFunction, token, value, flags, CBOR_PARSER_MAX_RECURSIONS);
+}
+
+/** @} */