coreutils/src/expr/syntax_tree.rs

/*
 * This file is part of the uutils coreutils package.
 *
 * (c) Roman Gafiyatullin <r.gafiyatullin@me.com>
 *
 * For the full copyright and license information, please view the LICENSE
 * file that was distributed with this source code.
 */

//!
//! Here we employ shunting-yard algorithm for building AST from tokens according to operators' precedence and associativeness.
//! * https://en.wikipedia.org/wiki/Shunting-yard_algorithm
//!

use tokens::{Token};

type TokenStack = Vec<(usize, Token)>;
pub type OperandsList = Vec< Box<ASTNode> >;

#[derive(Debug)]
pub enum ASTNode {
    Leaf { token_idx: usize, value: String },
    Node { token_idx: usize, op_type: String, operands: OperandsList }
}
impl ASTNode {
    fn debug_dump( &self ) {
        self.debug_dump_impl( 1 );
    }
    fn debug_dump_impl( &self, depth: usize ) {
        for _ in 0..depth {
            print!("\t", );
        }
        match *self {
            ASTNode::Leaf{ ref token_idx, ref value } => println!("Leaf( {} ) at #{} ( evaluate -> {:?} )", value, token_idx, self.evaluate()),
            ASTNode::Node{ ref token_idx, ref op_type, ref operands } => {
                println!("Node( {} ) at #{} (evaluate -> {:?})", op_type, token_idx, self.evaluate());
                for operand in operands {
                    operand.debug_dump_impl( depth + 1 );
                }
            }
        }
    }

    fn new_node( token_idx: usize, op_type: &String, operands: OperandsList ) -> Box<ASTNode> {
        Box::new( ASTNode::Node{
                token_idx: token_idx,
                op_type: op_type.clone(),
                operands: operands
            } )
    }
    fn new_leaf( token_idx: usize, value: &String ) -> Box<ASTNode> {
        Box::new( ASTNode::Leaf{ token_idx: token_idx, value: value.clone() } )
    }
    pub fn evaluate( &self ) -> Result<String, String> {
        match *self {
            ASTNode::Leaf{ ref value, .. } => Ok( value.clone() ),
            ASTNode::Node{ ref op_type, .. } =>
                match self.operand_values() {
                    Err( reason ) => Err( reason ),
                    Ok( operand_values ) =>
                        match op_type.as_ref() {
                            "+" => infix_operator_two_ints( |a: i64, b: i64| Ok( a + b ), &operand_values ),
                            "-" => infix_operator_two_ints( |a: i64, b: i64| Ok( a - b ), &operand_values ),
                            "*" => infix_operator_two_ints( |a: i64, b: i64| Ok( a * b ), &operand_values ),
                            "/" => infix_operator_two_ints(
                                |a: i64, b: i64|
                                    if b == 0 { Err("division by zero".to_owned()) }
                                    else { Ok( a / b ) },
                                &operand_values ),
                            "%" => infix_operator_two_ints(
                                |a: i64, b: i64|
                                    if b == 0 { Err("division by zero".to_owned()) }
                                    else { Ok( a % b ) },
                                &operand_values ),

                            "=" => infix_operator_two_ints_or_two_strings(
                                    |a: i64, b: i64| Ok( bool_as_int(a == b) ),
                                    |a: &String, b: &String| Ok( bool_as_string(a == b) ),
                                    &operand_values
                                ),
                            "!=" => infix_operator_two_ints_or_two_strings(
                                    |a: i64, b: i64| Ok( bool_as_int(a != b) ),
                                    |a: &String, b: &String| Ok( bool_as_string(a != b) ),
                                    &operand_values
                                ),
                            "<" => infix_operator_two_ints_or_two_strings(
                                    |a: i64, b: i64| Ok( bool_as_int(a < b) ),
                                    |a: &String, b: &String| Ok( bool_as_string(a < b) ),
                                    &operand_values
                                ),
                            ">" => infix_operator_two_ints_or_two_strings(
                                    |a: i64, b: i64| Ok( bool_as_int(a > b) ),
                                    |a: &String, b: &String| Ok( bool_as_string(a > b) ),
                                    &operand_values
                                ),
                            "<=" => infix_operator_two_ints_or_two_strings(
                                    |a: i64, b: i64| Ok( bool_as_int(a <= b) ),
                                    |a: &String, b: &String| Ok( bool_as_string(a <= b) ),
                                    &operand_values
                                ),
                            ">=" => infix_operator_two_ints_or_two_strings(
                                    |a: i64, b: i64| Ok( bool_as_int(a >= b) ),
                                    |a: &String, b: &String| Ok( bool_as_string(a >= b) ),
                                    &operand_values
                                ),
                            "|" => infix_operator_or(&operand_values),
                            "&" => infix_operator_and(&operand_values),
                            "length" => prefix_operator_length( &operand_values ),
                            "index" => prefix_operator_index( &operand_values ),
                            "substr" => prefix_operator_substr( &operand_values ),

                            _ => Err(format!("operation not implemented: {}", op_type))
                        }
                }
        }
    }
    pub fn operand_values( &self ) -> Result<Vec<String>, String> {
        if let &ASTNode::Node{ ref operands, .. } = self {
            let mut out = Vec::with_capacity( operands.len() );
            for operand in operands {
                match operand.evaluate() {
                    Ok( value ) => out.push( value ),
                    Err( reason ) => return Err( reason ),
                }
            }
            Ok( out )
        }
        else { panic!("Invoked .operand_values(&self) not with ASTNode::Node") }
    }
}

pub fn tokens_to_ast( maybe_tokens: Result< Vec<(usize, Token)>, String > ) -> Result<Box<ASTNode>, String> {
    if maybe_tokens.is_err() { Err( maybe_tokens.err().unwrap() ) }
    else {
        let tokens = maybe_tokens.ok().unwrap();
        let mut out_stack: TokenStack = Vec::new();
        let mut op_stack: TokenStack = Vec::new();

        for (token_idx, token) in tokens {
            if let Err( reason ) = push_token_to_either_stack( token_idx, &token, &mut out_stack, &mut op_stack ) {
                return Err( reason )
            }
        }
        if let Err( reason ) = move_rest_of_ops_to_out( &mut out_stack, &mut op_stack ) {
            return Err( reason )
        }
        assert!( op_stack.is_empty() );

        maybe_dump_rpn( &out_stack );
        let result = ast_from_rpn( &mut out_stack );
        if !out_stack.is_empty() {
            Err( "syntax error (fist RPN token does not represent expression AST's root)".to_owned() )
        }
        else {
            maybe_dump_ast( &result );
            result
        }
    }
}

fn maybe_dump_ast( result: &Result< Box<ASTNode>, String > ) {
    use std::env;
    if let Ok( debug_var ) = env::var( "EXPR_DEBUG_AST" ) {
        if debug_var == "1" {
            println!("EXPR_DEBUG_AST");
            match *result {
                Ok( ref ast ) => ast.debug_dump(),
                Err( ref reason ) => println!("\terr: {:?}", reason),
           }
        }
    }
}

fn maybe_dump_rpn( rpn: &TokenStack ) {
    use std::env;
    if let Ok( debug_var ) = env::var( "EXPR_DEBUG_RPN" ) {
        if debug_var == "1" {
            println!("EXPR_DEBUG_RPN");
            for token in rpn {
                println!("\t{:?}", token);
            }
        }
    }
}

fn ast_from_rpn( rpn: &mut TokenStack ) -> Result<Box<ASTNode>, String> {
    match rpn.pop() {
        None => Err( "syntax error (premature end of expression)".to_owned() ),

        Some( (token_idx, Token::Value{ value }) ) =>
            Ok( ASTNode::new_leaf( token_idx, &value ) ),

        Some( (token_idx, Token::InfixOp{ value, .. }) ) =>
            maybe_ast_node( token_idx, &value, 2, rpn ),

        Some( (token_idx, Token::PrefixOp{ value, arity }) ) =>
            maybe_ast_node( token_idx, &value, arity, rpn ),

        Some( (token_idx, unexpected_token) ) =>
            panic!("unexpected token at #{} {:?}", token_idx, unexpected_token),
    }
}
fn maybe_ast_node( token_idx: usize, op_type: &String, arity: usize, rpn: &mut TokenStack ) -> Result< Box<ASTNode>, String > {
    let mut operands = Vec::with_capacity( arity );
    for _ in 0..arity {
        match ast_from_rpn( rpn ) {
            Err( reason ) => return Err( reason ),
            Ok( operand ) => operands.push( operand ),
        }
    }
    operands.reverse();
    Ok( ASTNode::new_node( token_idx, op_type, operands ) )
}

fn move_rest_of_ops_to_out( out_stack: &mut TokenStack, op_stack: &mut TokenStack ) -> Result<(), String> {
    loop {
        match op_stack.pop() {
            None => return Ok( () ),
            Some( (token_idx, Token::ParOpen) ) => return Err( format!( "syntax error (Mismatched open-parenthesis at #{})", token_idx ) ),
            Some( (token_idx, Token::ParClose) ) => return Err( format!( "syntax error (Mismatched close-parenthesis at #{})", token_idx ) ),
            Some( other ) => out_stack.push( other )
        }
    }
}

fn push_token_to_either_stack( token_idx: usize, token: &Token, out_stack: &mut TokenStack, op_stack: &mut TokenStack ) -> Result<(), String> {
    let result =
        match *token {
            Token::Value{ .. } => Ok( out_stack.push( (token_idx, token.clone()) ) ),

            Token::InfixOp{ .. } =>
                if op_stack.is_empty() { Ok( op_stack.push( (token_idx, token.clone()) ) ) }
                else { push_op_to_stack( token_idx, token, out_stack, op_stack ) },

            Token::PrefixOp{ .. } => Ok( op_stack.push( (token_idx, token.clone()) ) ),

            Token::ParOpen => Ok( op_stack.push( (token_idx, token.clone()) ) ),

            Token::ParClose => move_till_match_paren( out_stack, op_stack )
        };
    maybe_dump_shunting_yard_step( token_idx, token, out_stack, op_stack, &result );
    result
}

fn maybe_dump_shunting_yard_step( token_idx: usize, token: &Token, out_stack: &TokenStack, op_stack: &TokenStack, result: &Result<(), String> ) {
    use std::env;
    if let Ok( debug_var ) = env::var( "EXPR_DEBUG_SYA_STEP" ) {
        if debug_var == "1" {
            println!("EXPR_DEBUG_SYA_STEP");
            println!("\t{} => {:?}", token_idx, token);
            println!("\t\tout: {:?}", out_stack);
            println!("\t\top : {:?}", op_stack);
            println!("\t\tresult: {:?}", result);
        }
    }
}

fn push_op_to_stack( token_idx: usize, token: &Token, out_stack: &mut TokenStack, op_stack: &mut TokenStack ) -> Result<(), String> {
    if let &Token::InfixOp{ precedence: prec, left_assoc: la, .. } = token {
        loop {
            match op_stack.last() {
                None =>
                    return Ok( op_stack.push( (token_idx, token.clone()) ) ),

                Some( &(_, Token::ParOpen) ) =>
                    return Ok( op_stack.push( (token_idx, token.clone()) ) ),

                Some( &(_, Token::InfixOp{ precedence: prev_prec, .. }) ) =>
                    if la && prev_prec >= prec
                    || !la && prev_prec > prec {
                        out_stack.push( op_stack.pop().unwrap() )
                    }
                    else {
                        return Ok( op_stack.push( (token_idx, token.clone()) ) )
                    },

                Some( &(_, Token::PrefixOp{ .. }) ) =>
                    return Ok( op_stack.push( (token_idx, token.clone()) ) ),

                Some( _ ) => panic!("Non-operator on op_stack")
            }
        }
    }
    else {
        panic!("Expected infix-op")
    }
}

fn move_till_match_paren( out_stack: &mut TokenStack, op_stack: &mut TokenStack ) -> Result<(), String> {
    loop {
        match op_stack.pop() {
            None => return Err( "syntax error (Mismatched close-parenthesis)".to_string() ),
            Some( (_, Token::ParOpen) ) => return Ok( () ),
            Some( other ) => out_stack.push( other )
        }
    }
}


fn infix_operator_two_ints<F>( f: F, values: &Vec<String> ) -> Result<String, String>
    where F : Fn( i64, i64 ) -> Result<i64, String>
{
    assert!( values.len() == 2 );
    if let Some( left ) = values[0].parse::<i64>().ok() {
        if let Some( right ) = values[1].parse::<i64>().ok() {
            return match f( left, right ) {
                Ok(result) => Ok(result.to_string()),
                Err(reason) => Err(reason),
            }
        }
    }
    Err( "Expected an integer operand".to_string() )
}


fn infix_operator_two_ints_or_two_strings<FI, FS>( fi: FI, fs: FS, values: &Vec<String> ) -> Result<String, String>
    where FI : Fn( i64, i64 ) -> Result<i64, String>,
          FS : Fn( &String, &String ) -> Result<String, String>
{
    assert!( values.len() == 2 );
    if let ( Some( a_int ), Some( b_int ) ) =
        (
            values[0].parse::<i64>().ok(),
            values[1].parse::<i64>().ok()
        ) {
            match fi( a_int, b_int ) {
                Ok( result ) => Ok(result.to_string()),
                Err( reason ) => Err(reason)
            }
        }
    else {
        fs( &values[0], &values[1] )
    }
}

fn infix_operator_or( values: &Vec<String> ) -> Result<String, String> {
    assert!(values.len() == 2);
    if value_as_bool(&values[0]) {
        Ok(values[0].clone())
    } else {
        Ok(values[1].clone())
    }
}

fn infix_operator_and( values: &Vec<String> ) -> Result<String, String> {
    if value_as_bool(&values[0]) && value_as_bool(&values[1]) {
        Ok(values[0].clone())
    } else {
        Ok(0.to_string())
    }
}

fn prefix_operator_length( values: &Vec<String> ) -> Result<String, String> {
    assert!( values.len() == 1 );
    Ok( values[0].len().to_string() )
}

fn prefix_operator_index( values: &Vec<String> ) -> Result<String, String> {
    assert!( values.len() == 2 );
    let haystack = &values[0];
    let needles = &values[1];

    let mut current_idx = 0;
    for ch_h in haystack.chars() {
        current_idx += 1;

        for ch_n in needles.chars() {
            if ch_n == ch_h {
                return Ok( current_idx.to_string() )
            }
        }
    }
    Ok( "0".to_string() )
}

fn prefix_operator_substr( values: &Vec<String> ) -> Result<String, String> {
    assert!( values.len() == 3 );
    let subj = &values[0];
    let mut idx = match values[1].parse::<i64>() {
        Ok( i ) => i,
        Err( _ ) => return Err( "expected integer as POS arg to 'substr'".to_string() ),
    };
    let mut len = match values[2].parse::<i64>() {
        Ok( i ) => i,
        Err( _ ) => return Err( "expected integer as LENGTH arg to 'substr'".to_string() ),
    };

    if idx <= 0 || len <= 0 { return Ok( "".to_string() ) }

    let mut out_str = String::new();
    for ch in subj.chars() {
        idx -= 1;
        if idx <= 0 {
            if len <= 0 { break; }
            len -= 1;

            out_str.push( ch );
        }
    }
    Ok( out_str )
}

fn bool_as_int( b: bool ) -> i64 { if b { 1 } else { 0 } }
fn bool_as_string( b: bool ) -> String { if b { "1".to_string() } else { "0".to_string() } }
fn value_as_bool( s: &str ) -> bool {
    if s.len() == 0 {
        return false
    }
    match s.parse::<i64>() {
        Ok(n) => n != 0,
        Err(_) => true,
    }
}