rapidyaml 0.15.2
parse and emit YAML, and do it fast
Loading...
Searching...
No Matches
Base64 encoding/decoding

Topics

 Base64 format specifiers

Functions

bool c4::base64_valid (const char *encoded, size_t encoded_sz)
 check that the given buffer is a valid base64 encoding
size_t c4::base64_encode (char *encoded, size_t encoded_sz, void const *data, size_t data_sz)
 base64-encode binary data.
bool c4::base64_decode (char const *encoded, size_t encoded_sz, void *data, size_t data_sz, size_t *data_sz_required)
 decode the base64 encoding in the given buffer.

Detailed Description

See also
https://en.wikipedia.org/wiki/Base64
https://www.base64encode.org/

Function Documentation

◆ base64_valid()

bool c4::base64_valid ( const char * encoded,
size_t encoded_sz )

check that the given buffer is a valid base64 encoding

See also
https://en.wikipedia.org/wiki/Base64

Definition at line 332 of file base64.cpp.

333{
334 if(!encoded_sz)
335 return true;
336 if((encoded_sz & size_t(3u))) // is it not a multiple of 4?
337 return false;
338 const char *C4_RESTRICT encoded = encoded_;
339 size_t i = 0;
340 #if C4_WORDSIZE >= 8
341 for( ; i + 8 < encoded_sz; i += 8)
342 if(!is_valid_encoded_group8_(encoded + i))
343 return false;
344 #endif
345 for( ; i + 4 < encoded_sz; i += 4)
346 if(!is_valid_encoded_group4_(encoded + i))
347 return false;
348 if(!is_valid_encoded_char_(encoded[i])
349 || !is_valid_encoded_char_(encoded[i + 1]))
350 return false;
351 if(!is_valid_encoded_char_(encoded[i + 2]))
352 return (encoded[i + 2] == '=' && encoded[i + 3] == '=');
353 if(!is_valid_encoded_char_(encoded[i + 3]))
354 return (encoded[i + 3] == '=');
355 return true;
356}

Referenced by base64_valid().

◆ base64_encode()

size_t c4::base64_encode ( char * encoded,
size_t encoded_sz,
void const * data,
size_t data_sz )

base64-encode binary data.

This is a plain implementation with a focus on simplicity and small footprint, such that it runs reasonably well in constrained platforms. On larger platforms it is reasonably fast (reaching 3GB/s and over), but it is not the fastest. If ultimate base64 speed in x64 platforms is your objective, there are faster implementations available. One recommendation is https://github.com/aklomp/base64, which uses a larger Look-Up Table (4096B as compared with 64B in c4core), making it between 1.5x~2x faster than c4core for larger payloads (but also slower for small payloads), and much faster when using AVX2 or AVX512 processing. But this speed comes at a cost in constrained platforms: eg c4core encodes ~2.5x faster in armv4 and armv5.

Parameters
encoded[out] output buffer for encoded data
encoded_sz[in] size of the output buffer for encoded data
data[in] the input buffer with the binary data
data_sz[in] size of the input buffer with the binary data
Returns
the number of bytes required for the output buffer. No writes occur beyond the end of the output buffer, so it is safe to do a speculative call where the encoded buffer is empty, or maybe too small. The caller should ensure that the returned size is smaller than the size of the encoded buffer.
Note
the result depends on endianness. If transfer between little/big endian systems is desired, the caller should normalize data before encoding.
See also
https://en.wikipedia.org/wiki/Base64

Definition at line 361 of file base64.cpp.

362{
363 C4_ASSERT(encoded_ != nullptr || encoded_sz == 0);
364 C4_ASSERT(data_ != nullptr || data_sz == 0);
365 // ....................... how many groups of 3 bytes to read
366 // .... each group results in 4 bytes written
367 size_t required_sz = ((data_sz + 3 - 1) / 3) * 4;
368 if(encoded_sz < required_sz)
369 return required_sz;
370 size_t rem = data_sz;
371 char *C4_RESTRICT encoded = encoded_;
372 const uint8_t *C4_RESTRICT data = (const uint8_t *) data_; // cast to unsigned to avoid wrapping high-bits
373#if (C4_WORDSIZE >= 8)
374 for( ; rem >= 15; rem -= 12) // leave 3 at the end (15=12+3)
375 {
376 base64_encode_block64_(data, encoded); data += 6; encoded += 8;
377 base64_encode_block64_(data, encoded); data += 6; encoded += 8;
378 }
379 for( ; rem >= 9; rem -= 6) // leave 3 at the end (9=6+3)
380 {
381 base64_encode_block64_(data, encoded); data += 6; encoded += 8;
382 }
383#else
384 for( ; rem >= 15; rem -= 12) // leave 3 at the end (15=12+3)
385 {
386 base64_encode_block32_(data, encoded); data += 3; encoded += 4;
387 base64_encode_block32_(data, encoded); data += 3; encoded += 4;
388 base64_encode_block32_(data, encoded); data += 3; encoded += 4;
389 base64_encode_block32_(data, encoded); data += 3; encoded += 4;
390 }
391 for( ; rem >= 9; rem -= 6) // leave 3 at the end (9=6+3)
392 {
393 base64_encode_block32_(data, encoded); data += 3; encoded += 4;
394 base64_encode_block32_(data, encoded); data += 3; encoded += 4;
395 }
396#endif
397 for( ; rem >= 3; rem -= 3)
398 {
399 base64_encode_block32_(data, encoded); data += 3; encoded += 4;
400 }
401 C4_ASSERT(rem < 3);
402 if(rem == 2)
403 base64_encode_block32_term2_(data, encoded);
404 else if(rem == 1)
405 base64_encode_block32_term1_(data, encoded);
406 return required_sz;
407}

Referenced by base64_encode(), to_chars(), and to_chars().

◆ base64_decode()

bool c4::base64_decode ( char const * encoded,
size_t encoded_sz,
void * data,
size_t data_sz,
size_t * data_sz_required )

decode the base64 encoding in the given buffer.

This is a plain implementation with a focus on simplicity and small footprint, such that it runs reasonably well in constrained platforms. On larger platforms it is reasonably fast, but it is not the fastest. If ultimate base64 speed in x64 platforms is your objective, there are faster implementations available. One recommendation is https://github.com/aklomp/base64, which uses up to 16x larger Look-Up Tables, making it between 1.5x~2x faster than c4core (but also slower for small payloads), and much faster when using AVX2 or AVX512 processing. But this x64 speed comes at a cost in constrained platforms: eg c4core decodes ~4x faster in armv4 and armv5.

Parameters
encoded[in] the encoded base64
encoded_sz[in] the size of the encoded buffer
data[out] the output decoded buffer
data_sz[in] the size of the output decoded buffer
data_sz_required[out] the size required for the output decoded buffer, ie, the number of bytes needed to return the output (ie the required size for data). No writes occur beyond the end of the output buffer, so it is safe to do a speculative call where the data buffer is empty, or maybe too small. The caller should ensure that this value is smaller than data_sz.
Returns
false if the encoding was invalid or the data size was too small, and true otherwise.
Note
the result depends on endianness. If transfer between little/big endian systems is desired, the caller should normalize data after decoding.
See also
https://en.wikipedia.org/wiki/Base64

Definition at line 412 of file base64.cpp.

415{
416 C4_ASSERT(encoded_ != nullptr || encoded_sz == 0);
417 C4_ASSERT(data_ != nullptr || data_sz == 0);
418 C4_ASSERT(data_sz_required != nullptr);
419 if(!encoded_sz)
420 {
421 *data_sz_required = 0;
422 return true;
423 }
424 else if(encoded_sz & 3u) // is encoded_sz not a multiple of 4?
425 {
426 return false;
427 }
428 // compute the required size for the decoded buffer:
429 // ................ how many 4-byte groups of encoded data to decode
430 // .... each group results in 3 decoded bytes
431 *data_sz_required = (encoded_sz / 4) * 3;
432 const char *C4_RESTRICT encoded = encoded_;
433 // account for padded bytes at the end
434 C4_ASSERT(encoded_sz >= 4);
435 if(encoded[encoded_sz - 1] == '=')
436 {
437 C4_ASSERT(*data_sz_required >= 3);
438 if(encoded[encoded_sz - 2] == '=')
439 *data_sz_required -= 2;
440 else
441 *data_sz_required -= 1;
442 }
443 if(data_sz < *data_sz_required)
444 return false;
445 // we have enough room
446 size_t rem = *data_sz_required; // numbytes remaining to write
447 dectype *C4_RESTRICT data = (dectype *)data_;
448 C4_STATIC_ASSERT(sizeof(dectype) == 1);
449#if (C4_WORDSIZE >= 8)
450 for( ; rem >= 15; rem -= 12)
451 {
452 if C4_UNLIKELY(!is_valid_encoded_group16_(encoded, 16))
453 return false;
454 base64_decode_block64_(encoded, data); encoded += 8; data += 6;
455 base64_decode_block64_(encoded, data); encoded += 8; data += 6;
456 }
457 for( ; rem >= 9; rem -= 6)
458 {
459 if C4_UNLIKELY(!is_valid_encoded_group8_(encoded))
460 return false;
461 base64_decode_block64_(encoded, data); encoded += 8; data += 6;
462 }
463#else
464 for( ; rem >= 9; rem -= 6)
465 {
466 if C4_UNLIKELY(!is_valid_encoded_group8_(encoded))
467 return false;
468 base64_decode_block32_(encoded, data); encoded += 4; data += 3;
469 base64_decode_block32_(encoded, data); encoded += 4; data += 3;
470 }
471#endif
472 for( ; rem >= 3; rem -= 3)
473 {
474 if C4_UNLIKELY(!is_valid_encoded_group4_(encoded))
475 return false;
476 base64_decode_block32_(encoded, data); encoded += 4; data += 3;
477 }
478 C4_ASSERT(rem < 3);
479 // the last quartet requires dealing with padded chars
480 if(rem == 1) // 1 remaining byte, 2 padding chars
481 {
482 if(!is_valid_encoded_char_(encoded[0])
483 || !is_valid_encoded_char_(encoded[1])
484 || encoded[2] != '='
485 || encoded[3] != '=')
486 return false;
487 base64_decode_block32_term1_(encoded, data);
488 }
489 else if(rem == 2) // 2 remaining bytes, 1 padding char
490 {
491 if(!is_valid_encoded_char_(encoded[0])
492 || !is_valid_encoded_char_(encoded[1])
493 || !is_valid_encoded_char_(encoded[2])
494 || encoded[3] != '=')
495 return false;
496 base64_decode_block32_term2_(encoded, data);
497 }
498 return true;
499}

Referenced by base64_decode(), from_chars(), and from_chars().