float16.go 3.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
  1. package utils
  2. import (
  3. "bytes"
  4. "io"
  5. "math"
  6. )
  7. // We define an unsigned 16-bit floating point value, inspired by IEEE floats
  8. // (http://en.wikipedia.org/wiki/Half_precision_floating-point_format),
  9. // with 5-bit exponent (bias 1), 11-bit mantissa (effective 12 with hidden
  10. // bit) and denormals, but without signs, transfinites or fractions. Wire format
  11. // 16 bits (little-endian byte order) are split into exponent (high 5) and
  12. // mantissa (low 11) and decoded as:
  13. // uint64_t value;
  14. // if (exponent == 0) value = mantissa;
  15. // else value = (mantissa | 1 << 11) << (exponent - 1)
  16. const uFloat16ExponentBits = 5
  17. const uFloat16MaxExponent = (1 << uFloat16ExponentBits) - 2 // 30
  18. const uFloat16MantissaBits = 16 - uFloat16ExponentBits // 11
  19. const uFloat16MantissaEffectiveBits = uFloat16MantissaBits + 1 // 12
  20. const uFloat16MaxValue = ((uint64(1) << uFloat16MantissaEffectiveBits) - 1) << uFloat16MaxExponent // 0x3FFC0000000
  21. // readUfloat16 reads a float in the QUIC-float16 format and returns its uint64 representation
  22. func readUfloat16(b io.ByteReader, byteOrder ByteOrder) (uint64, error) {
  23. val, err := byteOrder.ReadUint16(b)
  24. if err != nil {
  25. return 0, err
  26. }
  27. res := uint64(val)
  28. if res < (1 << uFloat16MantissaEffectiveBits) {
  29. // Fast path: either the value is denormalized (no hidden bit), or
  30. // normalized (hidden bit set, exponent offset by one) with exponent zero.
  31. // Zero exponent offset by one sets the bit exactly where the hidden bit is.
  32. // So in both cases the value encodes itself.
  33. return res, nil
  34. }
  35. exponent := val >> uFloat16MantissaBits // No sign extend on uint!
  36. // After the fast pass, the exponent is at least one (offset by one).
  37. // Un-offset the exponent.
  38. exponent--
  39. // Here we need to clear the exponent and set the hidden bit. We have already
  40. // decremented the exponent, so when we subtract it, it leaves behind the
  41. // hidden bit.
  42. res -= uint64(exponent) << uFloat16MantissaBits
  43. res <<= exponent
  44. return res, nil
  45. }
  46. // writeUfloat16 writes a float in the QUIC-float16 format from its uint64 representation
  47. func writeUfloat16(b *bytes.Buffer, byteOrder ByteOrder, value uint64) {
  48. var result uint16
  49. if value < (uint64(1) << uFloat16MantissaEffectiveBits) {
  50. // Fast path: either the value is denormalized, or has exponent zero.
  51. // Both cases are represented by the value itself.
  52. result = uint16(value)
  53. } else if value >= uFloat16MaxValue {
  54. // Value is out of range; clamp it to the maximum representable.
  55. result = math.MaxUint16
  56. } else {
  57. // The highest bit is between position 13 and 42 (zero-based), which
  58. // corresponds to exponent 1-30. In the output, mantissa is from 0 to 10,
  59. // hidden bit is 11 and exponent is 11 to 15. Shift the highest bit to 11
  60. // and count the shifts.
  61. exponent := uint16(0)
  62. for offset := uint16(16); offset > 0; offset /= 2 {
  63. // Right-shift the value until the highest bit is in position 11.
  64. // For offset of 16, 8, 4, 2 and 1 (binary search over 1-30),
  65. // shift if the bit is at or above 11 + offset.
  66. if value >= (uint64(1) << (uFloat16MantissaBits + offset)) {
  67. exponent += offset
  68. value >>= offset
  69. }
  70. }
  71. // Hidden bit (position 11) is set. We should remove it and increment the
  72. // exponent. Equivalently, we just add it to the exponent.
  73. // This hides the bit.
  74. result = (uint16(value) + (exponent << uFloat16MantissaBits))
  75. }
  76. byteOrder.WriteUint16(b, result)
  77. }