diff --git a/src/js/config/Categories.js b/src/js/config/Categories.js
index 5120e5c9..09912a95 100755
--- a/src/js/config/Categories.js
+++ b/src/js/config/Categories.js
@@ -187,6 +187,7 @@ var Categories = [
"Extract dates",
"Regular expression",
"XPath expression",
+ "CSS selector",
]
},
{
diff --git a/src/js/config/OperationConfig.js b/src/js/config/OperationConfig.js
index dec017e0..98539c3a 100755
--- a/src/js/config/OperationConfig.js
+++ b/src/js/config/OperationConfig.js
@@ -1911,6 +1911,24 @@ var OperationConfig = {
}
]
},
+ "CSS selector": {
+ description: "Extract information from an HTML document with an CSS selector",
+ run: Extract.run_css_query,
+ input_type: "string",
+ output_type: "string",
+ args: [
+ {
+ name: "CSS selector",
+ type: "string",
+ value: Extract.SELECTOR_INITIAL
+ },
+ {
+ name: "Delimiter",
+ type: "binary_short_string",
+ value: Extract.CSS_QUERY_DELIMITER
+ },
+ ]
+ },
"From UNIX Timestamp": {
description: "Converts a UNIX timestamp to a datetime string.
e.g. 978346800
becomes Mon 1 January 2001 11:00:00 UTC
",
run: DateTime.run_from_unix_timestamp,
diff --git a/src/js/operations/Extract.js b/src/js/operations/Extract.js
index 6ee0b071..5582baee 100755
--- a/src/js/operations/Extract.js
+++ b/src/js/operations/Extract.js
@@ -314,8 +314,8 @@ var Extract = {
* @returns {string}
*/
run_xpath:function(input, args) {
- var query = args[0];
- var delimiter = args[1];
+ const query = args[0];
+ const delimiter = args[1];
try {
var xml = $.parseXML(input);
@@ -329,7 +329,7 @@ var Extract = {
return "Invalid XPath. Details:\n" + err.message;
}
- var serializer = new XMLSerializer();
+ const serializer = new XMLSerializer();
const nodeToString = function(node) {
const { nodeType, value, wholeText, data } = node;
switch (nodeType) {
@@ -344,5 +344,59 @@ var Extract = {
return Object.values(result).slice(0, -1) // all values except last (length)
.map(nodeToString)
.join(delimiter);
- }
+ },
+
+
+ /**
+ * @constant
+ * @default
+ */
+ SELECTOR_INITIAL: "",
+ /**
+ * @constant
+ * @default
+ */
+ CSS_QUERY_DELIMITER: "\\n",
+
+ /**
+ * Extract information (from an hmtl document) with an css selector
+ *
+ * @param {string} input
+ * @param {Object[]} args
+ * @returns {string}
+ */
+ run_css_query: function(input, args) {
+ const query = args[0];
+ const delimiter = args[1];
+
+ try {
+ var html = $.parseHTML(input);
+ } catch (err) {
+ return "Invalid input HTML.";
+ }
+
+ try {
+ var result = $(html).find(query);
+ } catch (err) {
+ return "Invalid CSS Selector. Details:\n" + err.message;
+ }
+
+ const nodeToString = function(node) {
+ const { nodeType, value, wholeText, data } = node;
+ switch (nodeType) {
+ case Node.ELEMENT_NODE: return node.outerHTML;
+ case Node.ATTRIBUTE_NODE: return value;
+ case Node.COMMENT_NODE: return data;
+ case Node.TEXT_NODE: return wholeText;
+ case Node.DOCUMENT_NODE: return node.outerHTML;
+ default: throw new Error(`Unknown Node Type: ${nodeType}`);
+ }
+ }
+
+ return Array.apply(null, Array(result.length))
+ .map(function (_, i) {return result[i];})
+ .map(nodeToString)
+ .join(delimiter);
+ },
+
};