/* -----------------------------------------------------
*
* File: NumberUnitSplitter.cs
* Author: sven.kopacz@keysight.com
* Created: 21.10.2016
*
* -----------------------------------------------------
*
* Description: See class summary
*
* -----------------------------------------------------
*/
using System;
using System.Collections.Generic;
using System.Globalization;
namespace OpenTap
{
///
/// A number unit splitter can split a text into numbers and units using whitespaces and changes
/// from digits to characters as splitting points.
///
public static class NumberUnitSplitter
{
///
/// Splits a string into pairs of numbers and units. The units are not evaluated in any way but
/// just returned as strings. Separation points are whitespaces, any switch from digits to
/// characters and vice versa. Dots, commas etc. are considered to belong to numbers, also the
/// character 'e' when it is followed by a sign or digit. You can provide parsing parameters
/// though that may render such a number invalid.
/// For empty input strings, an empty list is returned.
/// In case of invalid numbers, an FormatException is thrown.
///
///
/// The text.
/// The accepted styles for number parsing.
/// The culture for number parsing. If omitted, DefaultThreadCurrentCulture is chosen.
///
/// A List<Tuple<double,string>>
public static List> Split(string text, NumberStyles numberParseStyles = NumberStyles.Any, CultureInfo numberParseCulture = null)
{
List> result = new List>();
while (!string.IsNullOrWhiteSpace(text))
{
// Extract number
string number = GetLeadingDigits(text, numberParseStyles);
if(string.IsNullOrWhiteSpace(number))
throw new FormatException("Missing number");
// Skip whitespace between number and unit
int i = number.Length;
while ((i < text.Length) && char.IsWhiteSpace(text[i]))
i++;
int istart = i;
// Extract unit
// Everything below A is not considered a unit character
// Everything from a-z, A-Z and everything with codes > 127 is considered a regular char
// Remember that a char in .Net is actually a 16 bit unicode character and the unit could
// have something like a leading "µ", a "£" or "°C"
int unitStartIndex = i;
while ((i < text.Length)
&&
(
((text[i] >= 'A') && (text[i] <= 'Z'))
||
((text[i] >= 'a') && (text[i] <= 'z'))
||
(text[i] == '+' && i >= istart)
||
(text[i] == '-' && i >= istart)
||
(text[i] >= 128)
)
)
i++;
string unit = "";
if (i != unitStartIndex)
unit = text.Substring(unitStartIndex, i - unitStartIndex);
result.Add(new Tuple(double.Parse(number, numberParseStyles, numberParseCulture ?? CultureInfo.DefaultThreadCurrentCulture), unit));
if (i >= text.Length)
break;
text = text.Substring(i);
}
return result;
}
///
/// Retrieves the leading part of the string that contains digits only and an optional set of
/// formatting signs. Note that the returned string may even consist of formatting signs only
/// like ",.-+e+" so you should use the .NET type parser on the result determine if it is an
/// actual number.
///
///
/// The string to parse.
/// This can be configured to accept the features decimal
/// delimiter, leading sign and exponent.
/// true to include leading white space characters.
///
/// The leading digits.
private static string GetLeadingDigits(string text, NumberStyles numberParseStyles = NumberStyles.Any, bool includeLeadingWhiteSpace = true)
{
bool acceptDecimalDelimiter = (numberParseStyles & (NumberStyles.AllowDecimalPoint | NumberStyles.AllowThousands)) > 0;
bool acceptLeadingSign = (numberParseStyles & NumberStyles.AllowLeadingSign) > 0;
bool acceptExponent = (numberParseStyles & NumberStyles.AllowExponent) > 0;
string digits = "";
bool nextCharAllowsSign = true;
foreach (char c in text)
{
bool acceptedChar = false;
if ((c < '0') || (c > '9'))
{
if (nextCharAllowsSign)
{
if ((c == '+') || (c == '-'))
{
if (acceptLeadingSign)
acceptedChar = true;
}
}
if ((c == '.') || (c == ','))
{
if (acceptDecimalDelimiter)
acceptedChar = true;
}
else if ((c == 'e') || (c == 'E'))
{
if (acceptExponent)
{
acceptedChar = true;
nextCharAllowsSign = true;
}
}
else if (char.IsWhiteSpace(c))
{
if (includeLeadingWhiteSpace && string.IsNullOrWhiteSpace(digits))
acceptedChar = true;
}
}
else
{
acceptedChar = true;
nextCharAllowsSign = false;
}
if (!acceptedChar)
break;
digits += c;
}
return digits;
}
}
}