[Branch,~jesse-barker/libmatrix/trunk] Rev 39: Merge of lp:~linaro-graphics-wg/libmatrix/split-refactor

Message ID 20120809211311.21705.71214.launchpad@ackee.canonical.com
State Accepted
Headers show

Commit Message

Jesse Barker Aug. 9, 2012, 9:13 p.m.
Merge authors:
  Alexandros Frantzis (afrantzis)
Related merge proposals:
  https://code.launchpad.net/~linaro-graphics-wg/libmatrix/split-refactor/+merge/118059
  proposed by: Alexandros Frantzis (afrantzis)
  review: Approve - Jesse Barker (jesse-barker)
------------------------------------------------------------
revno: 39 [merge]
committer: Jesse Barker <jesse.barker@linaro.org>
branch nick: trunk
timestamp: Thu 2012-08-09 14:11:02 -0700
message:
  Merge of lp:~linaro-graphics-wg/libmatrix/split-refactor
  
  Adds bash-style escaping for Util::split and cleans up the API to handle all of
  the modes we now support.
added:
  test/util_split_test.cc
  test/util_split_test.h
modified:
  Makefile
  shader-source.cc
  test/libmatrix_test.cc
  util.cc
  util.h


--
lp:libmatrix
https://code.launchpad.net/~jesse-barker/libmatrix/trunk

You are subscribed to branch lp:libmatrix.
To unsubscribe from this branch go to https://code.launchpad.net/~jesse-barker/libmatrix/trunk/+edit-subscription

Patch

=== modified file 'Makefile'
--- Makefile	2012-05-02 21:36:04 +0000
+++ Makefile	2012-08-03 09:51:22 +0000
@@ -9,6 +9,7 @@ 
            $(TESTDIR)/inverse_test.cc \
            $(TESTDIR)/transpose_test.cc \
            $(TESTDIR)/shader_source_test.cc \
+           $(TESTDIR)/util_split_test.cc \
            $(TESTDIR)/libmatrix_test.cc
 TESTOBJS = $(TESTSRCS:.cc=.o)
 
@@ -32,6 +33,7 @@ 
 $(TESTDIR)/inverse_test.o: $(TESTDIR)/inverse_test.cc $(TESTDIR)/inverse_test.h $(TESTDIR)/libmatrix_test.h mat.h
 $(TESTDIR)/transpose_test.o: $(TESTDIR)/transpose_test.cc $(TESTDIR)/transpose_test.h $(TESTDIR)/libmatrix_test.h mat.h
 $(TESTDIR)/shader_source_test.o: $(TESTDIR)/shader_source_test.cc $(TESTDIR)/shader_source_test.h $(TESTDIR)/libmatrix_test.h shader-source.h
+$(TESTDIR)/util_split_test.o: $(TESTDIR)/util_split_test.cc $(TESTDIR)/util_split_test.h $(TESTDIR)/libmatrix_test.h util.h
 $(TESTDIR)/libmatrix_test: $(TESTOBJS) libmatrix.a
 	$(CXX) -o $@ $^
 run_tests: $(LIBMATRIX_TESTS)

=== modified file 'shader-source.cc'
--- shader-source.cc	2012-01-26 16:12:35 +0000
+++ shader-source.cc	2012-08-03 09:46:28 +0000
@@ -589,7 +589,7 @@ 
 {
     std::vector<std::string> elems;
 
-    Util::split(precision_values, ',', elems);
+    Util::split(precision_values, ',', elems, Util::SplitModeNormal);
 
     for (size_t i = 0; i < elems.size() && i < 4; i++) {
         const std::string& pstr(elems[i]);

=== modified file 'test/libmatrix_test.cc'
--- test/libmatrix_test.cc	2012-01-23 19:18:34 +0000
+++ test/libmatrix_test.cc	2012-08-06 15:04:36 +0000
@@ -8,6 +8,7 @@ 
 //
 // Contributors:
 //     Jesse Barker - original implementation.
+//     Alexandros Frantzis - Util::split tests
 //
 #include <iostream>
 #include <string>
@@ -17,6 +18,7 @@ 
 #include "transpose_test.h"
 #include "const_vec_test.h"
 #include "shader_source_test.h"
+#include "util_split_test.h"
 
 using std::cerr;
 using std::cout;
@@ -42,6 +44,8 @@ 
     testVec.push_back(new MatrixTest3x3Transpose());
     testVec.push_back(new MatrixTest4x4Transpose());
     testVec.push_back(new ShaderSourceBasic());
+    testVec.push_back(new UtilSplitTestNormal());
+    testVec.push_back(new UtilSplitTestQuoted());
 
     for (vector<MatrixTest*>::iterator testIt = testVec.begin();
          testIt != testVec.end();

=== added file 'test/util_split_test.cc'
--- test/util_split_test.cc	1970-01-01 00:00:00 +0000
+++ test/util_split_test.cc	2012-08-06 15:04:36 +0000
@@ -0,0 +1,180 @@ 
+//
+// Copyright (c) 2012 Linaro Limited
+//
+// All rights reserved. This program and the accompanying materials
+// are made available under the terms of the MIT License which accompanies
+// this distribution, and is available at
+// http://www.opensource.org/licenses/mit-license.php
+//
+// Contributors:
+//     Alexandros Frantzis - original implementation.
+//
+#include <iostream>
+#include <string>
+#include <vector>
+#include "libmatrix_test.h"
+#include "util_split_test.h"
+#include "../util.h"
+
+using std::cout;
+using std::endl;
+using std::string;
+using std::vector;
+
+template <typename T> static bool
+areVectorsEqual(vector<T>& vec1, vector<T>& vec2)
+{
+    if (vec1.size() != vec2.size())
+        return false;
+
+    for (unsigned int i = 0; i < vec1.size(); i++)
+    {
+        if (vec1[i] != vec2[i])
+            return false;
+    }
+
+    return true;
+}
+
+template <typename T> static void
+printVector(vector<T>& vec)
+{
+    cout << "[";
+    for (unsigned int i = 0; i < vec.size(); i++)
+    {
+        cout << '"' << vec[i] << '"';
+        if (i < vec.size() - 1)
+            cout << ", ";
+    }
+    cout << "]";
+}
+
+void
+UtilSplitTestNormal::run(const Options& options)
+{
+    const string test1("abc def ghi");
+    const string test2(" abc: def :ghi ");
+    vector<string> expected1;
+    vector<string> expected2;
+    vector<string> results;
+
+    expected1.push_back("abc");
+    expected1.push_back("def");
+    expected1.push_back("ghi");
+
+    expected2.push_back(" abc");
+    expected2.push_back(" def ");
+    expected2.push_back("ghi ");
+
+    if (options.beVerbose())
+    {
+        cout << "Testing string \"" << test1 << "\"" << endl;
+    }
+
+    Util::split(test1, ' ', results, Util::SplitModeNormal);
+
+    if (options.beVerbose())
+    {
+        cout << "Split result: ";
+        printVector(results);
+        cout << endl << "Expected: ";
+        printVector(expected1);
+        cout << endl;
+    }
+
+    if (!areVectorsEqual(results, expected1))
+    {
+        return;
+    }
+
+    results.clear();
+
+    if (options.beVerbose())
+    {
+        cout << "Testing string \"" << test2 << "\"" << endl;
+    }
+
+    Util::split(test2, ':', results, Util::SplitModeNormal);
+
+    if (options.beVerbose())
+    {
+        cout << "Split result: ";
+        printVector(results);
+        cout << endl << "Expected: ";
+        printVector(expected2);
+        cout << endl;
+    }
+
+    if (!areVectorsEqual(results, expected2))
+    {
+        return;
+    }
+
+    pass_ = true;
+}
+
+void
+UtilSplitTestQuoted::run(const Options& options)
+{
+    const string test1("abc \"def' ghi\" klm\\ nop -b qr:title='123 \"456'");
+    const string test2("abc: def='1:2:3:'ghi : \":jk\"");
+    vector<string> expected1;
+    vector<string> expected2;
+    vector<string> results;
+
+    expected1.push_back("abc");
+    expected1.push_back("def' ghi");
+    expected1.push_back("klm nop");
+    expected1.push_back("-b");
+    expected1.push_back("qr:title=123 \"456");
+
+    expected2.push_back("abc");
+    expected2.push_back(" def=1:2:3:ghi ");
+    expected2.push_back(" :jk");
+
+    if (options.beVerbose())
+    {
+        cout << "Testing string \"" << test1 << "\"" << endl;
+    }
+
+    Util::split(test1, ' ', results, Util::SplitModeQuoted);
+
+    if (options.beVerbose())
+    {
+        cout << "Split result: ";
+        printVector(results);
+        cout << endl << "Expected: ";
+        printVector(expected1);
+        cout << endl;
+    }
+
+    if (!areVectorsEqual(results, expected1))
+    {
+        return;
+    }
+
+    results.clear();
+
+    if (options.beVerbose())
+    {
+        cout << "Testing string \"" << test2 << "\"" << endl;
+    }
+
+    Util::split(test2, ':', results, Util::SplitModeQuoted);
+
+    if (options.beVerbose())
+    {
+        cout << "Split result: ";
+        printVector(results);
+        cout << endl << "Expected: ";
+        printVector(expected2);
+        cout << endl;
+    }
+
+    if (!areVectorsEqual(results, expected2))
+    {
+        return;
+    }
+
+    pass_ = true;
+}

=== added file 'test/util_split_test.h'
--- test/util_split_test.h	1970-01-01 00:00:00 +0000
+++ test/util_split_test.h	2012-08-06 15:04:36 +0000
@@ -0,0 +1,31 @@ 
+//
+// Copyright (c) 2012 Linaro Limited
+//
+// All rights reserved. This program and the accompanying materials
+// are made available under the terms of the MIT License which accompanies
+// this distribution, and is available at
+// http://www.opensource.org/licenses/mit-license.php
+//
+// Contributors:
+//     Alexandros Frantzis - original implementation.
+//
+#ifndef UTIL_SPLIT_TEST_H_
+#define UTIL_SPLIT_TEST_H_
+
+class MatrixTest;
+class Options;
+
+class UtilSplitTestNormal : public MatrixTest
+{
+public:
+    UtilSplitTestNormal() : MatrixTest("Util::split::normal") {}
+    virtual void run(const Options& options);
+};
+
+class UtilSplitTestQuoted : public MatrixTest
+{
+public:
+    UtilSplitTestQuoted() : MatrixTest("Util::split::quoted") {}
+    virtual void run(const Options& options);
+};
+#endif // UTIL_SPLIT_TEST_H_

=== modified file 'util.cc'
--- util.cc	2012-05-02 21:36:04 +0000
+++ util.cc	2012-08-03 10:08:36 +0000
@@ -25,25 +25,102 @@ 
 using std::string;
 using std::vector;
 
-void
-Util::split(const string& src, char delim, vector<string>& elementVec, bool fuzzy)
-{
-    // Trivial rejection
-    if (src.empty())
-    {
-        return;
-    }
-
-    // Simple case: we want to enforce the value of 'delim' strictly 
-    if (!fuzzy)
-    {
-        std::stringstream ss(src);
-        string item;
-        while(std::getline(ss, item, delim))
-            elementVec.push_back(item);
-        return;
-    }
-
+/*
+ * State machine for bash-like quoted string escaping:
+ *
+ *         \
+ *    -----------> +---------+
+ *    | ---------- | Escaped |
+ *    | |  *,ESC   +---------+
+ *    | |
+ *    | v      '
+ * +--------+ ---> +--------------+ -----
+ * | Normal | <--- | SingleQuoted |     | *, ESC
+ * +--------+  '   +--------------+ <----
+ *    | ^
+ *    | |
+ *    | |  "       +--------------+ ----
+ *    | ---------- | DoubleQuoted |    | *, ESC
+ *    -----------> +--------------+ <---
+ *         "             | ^
+ *                     \ | | *, ESC
+ *                       v |
+ *             +---------------------+
+ *             | DoubleQuotedEscaped |
+ *             +---------------------+
+ *
+ * ESC: Mark character as Escaped
+ */
+static void
+fill_escape_vector(const string &str, vector<bool> &esc_vec)
+{
+    enum State {
+        StateNormal,
+        StateEscaped,
+        StateDoubleQuoted,
+        StateDoubleQuotedEscaped,
+        StateSingleQuoted
+    };
+
+    State state = StateNormal;
+
+    for (string::const_iterator iter = str.begin();
+         iter != str.end();
+         iter++)
+    {
+        const char c(*iter);
+        bool esc = false;
+
+        switch (state) {
+            case StateNormal:
+                if (c == '"')
+                    state = StateDoubleQuoted;
+                else if (c == '\\')
+                    state = StateEscaped;
+                else if (c == '\'')
+                    state = StateSingleQuoted;
+                break;
+            case StateEscaped:
+                esc = true;
+                state = StateNormal;
+                break;
+            case StateDoubleQuoted:
+                if (c == '"')
+                    state = StateNormal;
+                else if (c == '\\')
+                    state = StateDoubleQuotedEscaped;
+                else
+                    esc = true;
+                break;
+            case StateDoubleQuotedEscaped:
+                esc = true;
+                state = StateDoubleQuoted;
+                break;
+            case StateSingleQuoted:
+                if (c == '\'')
+                    state = StateNormal;
+                else
+                    esc = true;
+            default:
+                break;
+        }
+
+        esc_vec.push_back(esc);
+    }
+}
+
+static void
+split_normal(const string& src, char delim, vector<string>& elementVec)
+{
+    std::stringstream ss(src);
+    string item;
+    while(std::getline(ss, item, delim))
+        elementVec.push_back(item);
+}
+
+static void
+split_fuzzy(const string& src, char delim, vector<string>& elementVec)
+{
     // Fuzzy case: Initialize our delimiter string based upon the caller's plus
     // a space to allow for more flexibility.
     string delimiter(" ");
@@ -76,6 +153,70 @@ 
     elementVec.push_back(str);
 }
 
+static void
+split_quoted(const string& src, char delim, vector<string>& elementVec)
+{
+    std::stringstream ss;
+    vector<bool> escVec;
+
+    /* Mark characters in the string as escaped or not */
+    fill_escape_vector(src, escVec);
+
+    /* Sanity check... */
+    if (src.length() != escVec.size())
+        return;
+
+    for (vector<bool>::const_iterator iter = escVec.begin();
+         iter != escVec.end();
+         iter++)
+    {
+        bool escaped = static_cast<bool>(*iter);
+        char c = src[iter - escVec.begin()];
+
+        /* Output all characters, except unescaped ",\,' */
+        if ((c != '"' && c != '\\' && c != '\'') || escaped) {
+            /* If we reach an unescaped delimiter character, do a split */
+            if (c == delim && !escaped) {
+                elementVec.push_back(ss.str());
+                ss.str("");
+                ss.clear();
+            }
+            else {
+                ss << c;
+            }
+        }
+
+    }
+
+    /* Handle final element, delimited by end of string */
+    const string &finalElement(ss.str());
+    if (!finalElement.empty())
+        elementVec.push_back(finalElement);
+}
+
+void
+Util::split(const string& src, char delim, vector<string>& elementVec,
+            Util::SplitMode mode)
+{
+    // Trivial rejection
+    if (src.empty())
+    {
+        return;
+    }
+
+    switch (mode)
+    {
+        case Util::SplitModeNormal:
+            return split_normal(src, delim, elementVec);
+        case Util::SplitModeFuzzy:
+            return split_fuzzy(src, delim, elementVec);
+        case Util::SplitModeQuoted:
+            return split_quoted(src, delim, elementVec);
+        default:
+            break;
+    }
+}
+
 uint64_t
 Util::get_timestamp_us()
 {

=== modified file 'util.h'
--- util.h	2012-05-02 21:36:04 +0000
+++ util.h	2012-08-03 10:08:36 +0000
@@ -25,21 +25,33 @@ 
 
 struct Util {
     /**
+     * How to perform the split() operation
+     */
+    enum SplitMode {
+        /** Normal split operation */
+        SplitModeNormal,
+        /** Allow for spaces and multiple consecutive occurences of the delimiter */
+        SplitModeFuzzy,
+        /** Take into account bash-like quoting and escaping rules */
+        SplitModeQuoted
+    };
+
+    /**
      * split() - Splits a string into elements using a provided delimiter
      *
      * @s:          the string to split
      * @delim:      the delimiter to use
      * @elems:      the string vector to populate
-     * @fuzzy:      (optional) enable/disable strict handling of @delim
+     * @mode:       the SplitMode to use
      *
      * Using @delim to determine field boundaries, splits @s into separate
      * string elements.  These elements are returned in the string vector
-     * @elems.  If @fuzzy is true, then the handling of @delim allows for
-     * spaces and multiple consecutive occurences of @delim in determining
-     * field boundaries.  As long as @s is non-empty, there will be at least
-     * one element in @elems.
+     * @elems. As long as @s is non-empty, there will be at least one
+     * element in @elems.
      */
-    static void split(const std::string &s, char delim, std::vector<std::string> &elems, bool fuzzy = false);
+    static void split(const std::string& src, char delim,
+                      std::vector<std::string>& elems,
+                      Util::SplitMode mode);
     /**
      * get_timestamp_us() - Returns the current time in microseconds
      */