added css_query operation

This commit is contained in:
Schwörer Mike 2016-12-02 12:49:49 +01:00
parent d5f84abb3b
commit 2db6f8f63c
3 changed files with 77 additions and 4 deletions

View file

@ -187,6 +187,7 @@ var Categories = [
"Extract dates",
"Regular expression",
"XPath expression",
"CSS selector",
]
},
{

View file

@ -1911,6 +1911,24 @@ var OperationConfig = {
}
]
},
"CSS selector": {
description: "Extract information from an HTML document with an CSS selector",
run: Extract.run_css_query,
input_type: "string",
output_type: "string",
args: [
{
name: "CSS selector",
type: "string",
value: Extract.SELECTOR_INITIAL
},
{
name: "Delimiter",
type: "binary_short_string",
value: Extract.CSS_QUERY_DELIMITER
},
]
},
"From UNIX Timestamp": {
description: "Converts a UNIX timestamp to a datetime string.<br><br>e.g. <code>978346800</code> becomes <code>Mon 1 January 2001 11:00:00 UTC</code>",
run: DateTime.run_from_unix_timestamp,

View file

@ -314,8 +314,8 @@ var Extract = {
* @returns {string}
*/
run_xpath:function(input, args) {
var query = args[0];
var delimiter = args[1];
const query = args[0];
const delimiter = args[1];
try {
var xml = $.parseXML(input);
@ -329,7 +329,7 @@ var Extract = {
return "Invalid XPath. Details:\n" + err.message;
}
var serializer = new XMLSerializer();
const serializer = new XMLSerializer();
const nodeToString = function(node) {
const { nodeType, value, wholeText, data } = node;
switch (nodeType) {
@ -344,5 +344,59 @@ var Extract = {
return Object.values(result).slice(0, -1) // all values except last (length)
.map(nodeToString)
.join(delimiter);
}
},
/**
* @constant
* @default
*/
SELECTOR_INITIAL: "",
/**
* @constant
* @default
*/
CSS_QUERY_DELIMITER: "\\n",
/**
* Extract information (from an hmtl document) with an css selector
*
* @param {string} input
* @param {Object[]} args
* @returns {string}
*/
run_css_query: function(input, args) {
const query = args[0];
const delimiter = args[1];
try {
var html = $.parseHTML(input);
} catch (err) {
return "Invalid input HTML.";
}
try {
var result = $(html).find(query);
} catch (err) {
return "Invalid CSS Selector. Details:\n" + err.message;
}
const nodeToString = function(node) {
const { nodeType, value, wholeText, data } = node;
switch (nodeType) {
case Node.ELEMENT_NODE: return node.outerHTML;
case Node.ATTRIBUTE_NODE: return value;
case Node.COMMENT_NODE: return data;
case Node.TEXT_NODE: return wholeText;
case Node.DOCUMENT_NODE: return node.outerHTML;
default: throw new Error(`Unknown Node Type: ${nodeType}`);
}
}
return Array.apply(null, Array(result.length))
.map(function (_, i) {return result[i];})
.map(nodeToString)
.join(delimiter);
},
};