core/num/dec2flt/parse.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241
//! Functions to parse floating-point numbers.
use crate::num::dec2flt::common::{ByteSlice, is_8digits};
use crate::num::dec2flt::float::RawFloat;
use crate::num::dec2flt::number::Number;
const MIN_19DIGIT_INT: u64 = 100_0000_0000_0000_0000;
/// Parse 8 digits, loaded as bytes in little-endian order.
///
/// This uses the trick where every digit is in [0x030, 0x39],
/// and therefore can be parsed in 3 multiplications, much
/// faster than the normal 8.
///
/// This is based off the algorithm described in "Fast numeric string to
/// int", available here: <https://johnnylee-sde.github.io/Fast-numeric-string-to-int/>.
fn parse_8digits(mut v: u64) -> u64 {
const MASK: u64 = 0x0000_00FF_0000_00FF;
const MUL1: u64 = 0x000F_4240_0000_0064;
const MUL2: u64 = 0x0000_2710_0000_0001;
v -= 0x3030_3030_3030_3030;
v = (v * 10) + (v >> 8); // will not overflow, fits in 63 bits
let v1 = (v & MASK).wrapping_mul(MUL1);
let v2 = ((v >> 16) & MASK).wrapping_mul(MUL2);
((v1.wrapping_add(v2) >> 32) as u32) as u64
}
/// Parse digits until a non-digit character is found.
fn try_parse_digits(mut s: &[u8], mut x: u64) -> (&[u8], u64) {
// may cause overflows, to be handled later
while s.len() >= 8 {
let num = s.read_u64();
if is_8digits(num) {
x = x.wrapping_mul(1_0000_0000).wrapping_add(parse_8digits(num));
s = &s[8..];
} else {
break;
}
}
s = s.parse_digits(|digit| {
x = x.wrapping_mul(10).wrapping_add(digit as _);
});
(s, x)
}
/// Parse up to 19 digits (the max that can be stored in a 64-bit integer).
fn try_parse_19digits(s_ref: &mut &[u8], x: &mut u64) {
let mut s = *s_ref;
while *x < MIN_19DIGIT_INT {
if let Some((c, s_next)) = s.split_first() {
let digit = c.wrapping_sub(b'0');
if digit < 10 {
*x = (*x * 10) + digit as u64; // no overflows here
s = s_next;
} else {
break;
}
} else {
break;
}
}
*s_ref = s;
}
/// Parse the scientific notation component of a float.
fn parse_scientific(s_ref: &mut &[u8]) -> Option<i64> {
let mut exponent = 0i64;
let mut negative = false;
let mut s = *s_ref;
if let Some((&c, s_next)) = s.split_first() {
negative = c == b'-';
if c == b'-' || c == b'+' {
s = s_next;
}
}
if matches!(s.first(), Some(&x) if x.is_ascii_digit()) {
*s_ref = s.parse_digits(|digit| {
// no overflows here, saturate well before overflow
if exponent < 0x10000 {
exponent = 10 * exponent + digit as i64;
}
});
if negative { Some(-exponent) } else { Some(exponent) }
} else {
*s_ref = s;
None
}
}
/// Parse a partial, non-special floating point number.
///
/// This creates a representation of the float as the
/// significant digits and the decimal exponent.
fn parse_partial_number(mut s: &[u8]) -> Option<(Number, usize)> {
debug_assert!(!s.is_empty());
// parse initial digits before dot
let mut mantissa = 0_u64;
let start = s;
let tmp = try_parse_digits(s, mantissa);
s = tmp.0;
mantissa = tmp.1;
let mut n_digits = s.offset_from(start);
// handle dot with the following digits
let mut n_after_dot = 0;
let mut exponent = 0_i64;
let int_end = s;
if let Some((&b'.', s_next)) = s.split_first() {
s = s_next;
let before = s;
let tmp = try_parse_digits(s, mantissa);
s = tmp.0;
mantissa = tmp.1;
n_after_dot = s.offset_from(before);
exponent = -n_after_dot as i64;
}
n_digits += n_after_dot;
if n_digits == 0 {
return None;
}
// handle scientific format
let mut exp_number = 0_i64;
if let Some((&c, s_next)) = s.split_first() {
if c == b'e' || c == b'E' {
s = s_next;
// If None, we have no trailing digits after exponent, or an invalid float.
exp_number = parse_scientific(&mut s)?;
exponent += exp_number;
}
}
let len = s.offset_from(start) as _;
// handle uncommon case with many digits
if n_digits <= 19 {
return Some((Number { exponent, mantissa, negative: false, many_digits: false }, len));
}
n_digits -= 19;
let mut many_digits = false;
let mut p = start;
while let Some((&c, p_next)) = p.split_first() {
if c == b'.' || c == b'0' {
n_digits -= c.saturating_sub(b'0' - 1) as isize;
p = p_next;
} else {
break;
}
}
if n_digits > 0 {
// at this point we have more than 19 significant digits, let's try again
many_digits = true;
mantissa = 0;
let mut s = start;
try_parse_19digits(&mut s, &mut mantissa);
exponent = if mantissa >= MIN_19DIGIT_INT {
// big int
int_end.offset_from(s)
} else {
s = &s[1..];
let before = s;
try_parse_19digits(&mut s, &mut mantissa);
-s.offset_from(before)
} as i64;
// add back the explicit part
exponent += exp_number;
}
Some((Number { exponent, mantissa, negative: false, many_digits }, len))
}
/// Try to parse a non-special floating point number,
/// as well as two slices with integer and fractional parts
/// and the parsed exponent.
pub fn parse_number(s: &[u8]) -> Option<Number> {
if let Some((float, rest)) = parse_partial_number(s) {
if rest == s.len() {
return Some(float);
}
}
None
}
/// Try to parse a special, non-finite float.
pub(crate) fn parse_inf_nan<F: RawFloat>(s: &[u8], negative: bool) -> Option<F> {
// Since a valid string has at most the length 8, we can load
// all relevant characters into a u64 and work from there.
// This also generates much better code.
let mut register;
let len: usize;
// All valid strings are either of length 8 or 3.
if s.len() == 8 {
register = s.read_u64();
len = 8;
} else if s.len() == 3 {
let a = s[0] as u64;
let b = s[1] as u64;
let c = s[2] as u64;
register = (c << 16) | (b << 8) | a;
len = 3;
} else {
return None;
}
// Clear out the bits which turn ASCII uppercase characters into
// lowercase characters. The resulting string is all uppercase.
// What happens to other characters is irrelevant.
register &= 0xDFDFDFDFDFDFDFDF;
// u64 values corresponding to relevant cases
const INF_3: u64 = 0x464E49; // "INF"
const INF_8: u64 = 0x5954494E49464E49; // "INFINITY"
const NAN: u64 = 0x4E414E; // "NAN"
// Match register value to constant to parse string.
// Also match on the string length to catch edge cases
// like "inf\0\0\0\0\0".
let float = match (register, len) {
(INF_3, 3) => F::INFINITY,
(INF_8, 8) => F::INFINITY,
(NAN, 3) => F::NAN,
_ => return None,
};
if negative { Some(-float) } else { Some(float) }
}