Version: SMASH-3.1
smash::utf8 Namespace Reference

Functions

std::string fill_left (const std::string &s, size_t width, char fill=' ')
 Fill string with characters to the left until the given width is reached. More...
 
std::string fill_right (const std::string &s, size_t width, char fill=' ')
 Fill string with characters to the right until the given width is reached. More...
 
std::string fill_both (const std::string &s, size_t width, char fill=' ')
 Fill string with characters at both sides until the given width is reached. More...
 
template<typename octet_type >
uint8_t mask8 (octet_type oc)
 Extract the first byte from a given value. More...
 
template<typename octet_iterator >
std::iterator_traits< octet_iterator >::difference_type sequence_length (octet_iterator lead_it)
 Given an iterator to the beginning of a UTF-8 sequence, return the length of the next UTF-8 code point. More...
 
static size_t adjust (const std::string &s, size_t width)
 Adjust filling width by taking the size of unicode characters into account. More...
 

Function Documentation

◆ fill_left()

std::string smash::utf8::fill_left ( const std::string &  s,
size_t  width,
char  fill = ' ' 
)

Fill string with characters to the left until the given width is reached.

Parameters
[in]sInput string.
[in]widthTotal width of output string.
[in]fillFilling character.
Returns
Padded string.

Definition at line 48 of file stringfunctions.cc.

48  {
49  width = adjust(s, width - s.size());
50  if (width > 0) {
51  return std::string(width, fill) + s;
52  }
53  return s;
54 }
static size_t adjust(const std::string &s, size_t width)
Adjust filling width by taking the size of unicode characters into account.

◆ fill_right()

std::string smash::utf8::fill_right ( const std::string &  s,
size_t  width,
char  fill = ' ' 
)

Fill string with characters to the right until the given width is reached.

Parameters
[in]sInput string.
[in]widthTotal width of output string.
[in]fillFilling character.
Returns
Padded string.

Definition at line 56 of file stringfunctions.cc.

56  {
57  width = adjust(s, width - s.size());
58  if (width > 0) {
59  return s + std::string(width, fill);
60  }
61  return s;
62 }

◆ fill_both()

std::string smash::utf8::fill_both ( const std::string &  s,
size_t  width,
char  fill = ' ' 
)

Fill string with characters at both sides until the given width is reached.

Parameters
[in]sInput string.
[in]widthTotal width of output string.
[in]fillFilling character.
Returns
Padded string.

Definition at line 64 of file stringfunctions.cc.

64  {
65  width = adjust(s, width - s.size());
66  if (width > 0) {
67  const int l = width / 2;
68  const int r = width - l;
69  return std::string(l, fill) + s + std::string(r, fill);
70  }
71  return s;
72 }

◆ mask8()

template<typename octet_type >
uint8_t smash::utf8::mask8 ( octet_type  oc)
inline

Extract the first byte from a given value.

This function was taken from the Boost-licensed library UTF8-CPP. See http://utfcpp.sourceforge.net/.

Template Parameters
octet_typeType for one byte

Definition at line 110 of file stringfunctions.h.

110  {
111  return static_cast<uint8_t>(0xff & oc);
112 }

◆ sequence_length()

template<typename octet_iterator >
std::iterator_traits<octet_iterator>::difference_type smash::utf8::sequence_length ( octet_iterator  lead_it)
inline

Given an iterator to the beginning of a UTF-8 sequence, return the length of the next UTF-8 code point.

This function was taken from the Boost-licensed library UTF8-CPP. See http://utfcpp.sourceforge.net/.

Definition at line 123 of file stringfunctions.h.

123  {
124  uint8_t lead = mask8(*lead_it);
125  if (lead < 0x80)
126  return 1;
127  else if ((lead >> 5) == 0x6)
128  return 2;
129  else if ((lead >> 4) == 0xe)
130  return 3;
131  else if ((lead >> 3) == 0x1e)
132  return 4;
133  else
134  return 0;
135 }
uint8_t mask8(octet_type oc)
Extract the first byte from a given value.

◆ adjust()

static size_t smash::utf8::adjust ( const std::string &  s,
size_t  width 
)
inlinestatic

Adjust filling width by taking the size of unicode characters into account.

This is necessary, because UTF-8 characters can be represented by more than byte.

Parameters
[in]sString to be filled.
[in]widthWidth (in bytes) to be adjusted.
Returns
Adjusted width.

Definition at line 28 of file stringfunctions.cc.

28  {
29  for (unsigned char c : s) {
30  if (c >= 0xFC) {
31  width += 5;
32  } else if (c >= 0xF8) {
33  width += 4;
34  } else if (c >= 0xF0) {
35  width += 3;
36  } else if (c >= 0xE0) {
37  width += 2;
38  } else if (c == 0xCC || c == 0xCD) {
39  // combining character (2 Bytes) - doesn't appear at all
40  width += 2;
41  } else if (c >= 0xC0) {
42  width += 1;
43  }
44  }
45  return width;
46 }