From ec6f5ebaf268106c5caea8cef7b494b8daf836ae Mon Sep 17 00:00:00 2001 From: whitequark Date: Fri, 10 Feb 2017 19:35:25 +0000 Subject: [PATCH] Automatically derive molar mass from formulas. This commit does not recognize any radioactive elements in formulas that rarely if ever appear as a part of compound, i.e. every one except Bi, Th, U, Pu. This is done to be able to recognize some common abbreviations in organic chemistry: Me, Et, Ac, Ph, some of which clash with transuranics. --- definitions.units | 110 +++++++++++++++++++++++++++++++++++++++++++++- src/ast.rs | 5 ++- src/context.rs | 2 + src/eval.rs | 10 ++++- src/formula.rs | 107 ++++++++++++++++++++++++++++++++++++++++++++ src/gnu_units.rs | 27 +++++++++++- src/lib.rs | 1 + src/load.rs | 13 +++--- 8 files changed, 264 insertions(+), 11 deletions(-) create mode 100644 src/formula.rs diff --git a/definitions.units b/definitions.units index 013a1bd..8758e45 100644 --- a/definitions.units +++ b/definitions.units @@ -5789,7 +5789,7 @@ wc pressure_column of water mmH2O pressure of mm water inH2O pressure of inch water - +!symbol mercury Hg mercury { density mass 13.5951 gram / volume cm^3 pressure_column pressure 13.5951 gram force cm^-2 / column cm @@ -5816,6 +5816,8 @@ ammonia { specific_heat specific_energy 4.6 J g^-1 / temperature K } +NH3 ammonia + freon { ?? R-12 at 0 degrees Fahrenheit. specific_heat specific_energy 0.91 J g^-1 / temperature K @@ -6369,6 +6371,7 @@ actinium { molar_mass mass 227.0278 g / amount mol } +!symbol aluminum Al aluminum { molar_mass mass 26.981539 g / amount mol specific_heat specific_energy 0.91 J g^-1 / temperature K @@ -6379,16 +6382,19 @@ americium { molar_mass mass 243.0614 g / amount mol } +!symbol antimony Sb antimony { molar_mass mass 121.760 g / amount mol specific_heat specific_energy 0.21 J g^-1 / temperature K } +!symbol argon Ar argon { molar_mass mass 39.948 g / amount mol specific_heat specific_energy 0.5203 J g^-1 / temperature K } +!symbol arsenic As arsenic { molar_mass mass 74.92159 g / amount mol } @@ -6398,6 +6404,7 @@ astatine { molar_mass mass 209.9871 g / amount mol } +!symbol barium Ba barium { molar_mass mass 137.327 g / amount mol specific_heat specific_energy 0.20 J g^-1 / temperature K @@ -6408,11 +6415,13 @@ berkelium { molar_mass mass 247.0703 g / amount mol } +!symbol beryllium Be beryllium { molar_mass mass 9.012182 g / amount mol specific_heat specific_energy 1.83 J g^-1 / temperature K } +!symbol bismuth Bi bismuth { molar_mass mass 208.98037 g / amount mol specific_heat specific_energy 0.13 J g^-1 / temperature K @@ -6422,19 +6431,23 @@ bohrium { molar_mass mass 272.13826 g / amount mol } +!symbol boron B boron { molar_mass mass 10.811 g / amount mol } +!symbol bromine Br bromine { molar_mass mass 79.904 g / amount mol } +!symbol cadmium Cd cadmium { molar_mass mass 112.411 g / amount mol specific_heat specific_energy 0.23 J g^-1 / temperature K } +!symbol calcium Ca calcium { molar_mass mass 40.078 g / amount mol } @@ -6444,28 +6457,34 @@ californium { molar_mass mass 251.0796 g / amount mol } +!symbol carbon C carbon { molar_mass mass 12.011 g / amount mol } +!symbol cerium Ce cerium { molar_mass mass 140.115 g / amount mol } +!symbol cesium Cs cesium { molar_mass mass 132.90543 g / amount mol specific_heat specific_energy 0.24 J g^-1 / temperature K } +!symbol chlorine Cl chlorine { molar_mass mass 35.4527 g / amount mol } +!symbol chromium Cr chromium { molar_mass mass 51.9961 g / amount mol specific_heat specific_energy 0.46 J g^-1 / temperature K } +!symbol cobalt Co cobalt { molar_mass mass 58.93320 g / amount mol specific_heat specific_energy 0.42 J g^-1 / temperature K @@ -6475,6 +6494,7 @@ copernicium { molar_mass mass 285.17712 g / amount mol } +!symbol copper Cu copper { molar_mass mass 63.546 g / amount mol specific_heat specific_energy 0.39 J g^-1 / temperature K @@ -6488,6 +6508,7 @@ darmstadtium { molar_mass mass 281.16451 g / amount mol } +!symbol deuterium D deuterium { molar_mass mass 2.0141017778 g / amount mol } @@ -6496,6 +6517,7 @@ dubnium { molar_mass mass 268.12567 g / amount mol } +!symbol dysprosium Dy dysprosium { molar_mass mass 162.50 g / amount mol } @@ -6505,10 +6527,12 @@ einsteinium { molar_mass mass 252.083 g / amount mol } +!symbol erbium Er erbium { molar_mass mass 167.26 g / amount mol } +!symbol europium Eu europium { molar_mass mass 151.965 g / amount mol } @@ -6522,6 +6546,7 @@ flerovium { molar_mass mass 289.19042 g / amount mol } +!symbol fluorine F fluorine { molar_mass mass 18.9984032 g / amount mol } @@ -6531,25 +6556,30 @@ francium { molar_mass mass 223.0197 g / amount mol } +!symbol gadolinium Gd gadolinium { molar_mass mass 157.25 g / amount mol } +!symbol gallium Ga gallium { molar_mass mass 69.723 g / amount mol specific_heat specific_energy 0.37 J g^-1 / temperature K } +!symbol germanium Ge germanium { molar_mass mass 72.61 g / amount mol specific_heat specific_energy 0.32 J g^-1 / temperature K } +!symbol gold Au gold { molar_mass mass 196.96654 g / amount mol specific_heat specific_energy 0.13 J g^-1 / temperature K } +!symbol hafnium Hf hafnium { molar_mass mass 178.49 g / amount mol specific_heat specific_energy 0.14 J g^-1 / temperature K @@ -6559,44 +6589,53 @@ hassium { molar_mass mass 270.13429 g / amount mol } +!symbol helium He helium { molar_mass mass 4.002602 g / amount mol } +!symbol holmium Ho holmium { molar_mass mass 164.93032 g / amount mol specific_heat specific_energy 5.1932 J g^-1 / temperature K } +!symbol hydrogen H hydrogen { molar_mass mass 1.00794 g / amount mol specific_heat specific_energy 14.3 J g^-1 / temperature K } +!symbol indium In indium { molar_mass mass 114.818 g / amount mol specific_heat specific_energy 0.24 J g^-1 / temperature K } +!symbol iodine I iodine { molar_mass mass 126.90447 g / amount mol specific_heat specific_energy 2.15 J g^-1 / temperature K } +!symbol iridium Ir iridium { molar_mass mass 192.217 g / amount mol specific_heat specific_energy 0.13 J g^-1 / temperature K } +!symbol iron Fe iron { molar_mass mass 55.845 g / amount mol specific_heat specific_energy 0.45 J g^-1 / temperature K } +!symbol krypton Kr krypton { molar_mass mass 83.80 g / amount mol } +!symbol lanthanum La lanthanum { molar_mass mass 138.9055 g / amount mol specific_heat specific_energy 0.195 J g^-1 / temperature K @@ -6607,11 +6646,13 @@ lawrencium { molar_mass mass 262.11 g / amount mol } +!symbol lead Pb lead { molar_mass mass 207.2 g / amount mol specific_heat specific_energy 0.13 J g^-1 / temperature K } +!symbol lithium Li lithium { molar_mass mass 6.941 g / amount mol specific_heat specific_energy 3.57 J g^-1 / temperature K @@ -6621,16 +6662,19 @@ livermorium { molar_mass mass 293.20449 g / amount mol } +!symbol lutetium Lu lutetium { molar_mass mass 174.967 g / amount mol specific_heat specific_energy 0.15 J g^-1 / temperature K } +!symbol magnesium Mg magnesium { molar_mass mass 24.3050 g / amount mol specific_heat specific_energy 1.05 J g^-1 / temperature K } +!symbol manganese Mn manganese { molar_mass mass 54.93805 g / amount mol specific_heat specific_energy 0.48 J g^-1 / temperature K @@ -6645,15 +6689,18 @@ mendelevium { molar_mass mass 258.10 g / amount mol } +!symbol molybdenum Mo molybdenum { molar_mass mass 95.94 g / amount mol specific_heat specific_energy 0.25 J g^-1 / temperature K } +!symbol neodymium Nd neodymium { molar_mass mass 144.24 g / amount mol } +!symbol neon Ne neon { molar_mass mass 20.1797 g / amount mol } @@ -6662,15 +6709,18 @@ neptunium { molar_mass mass 237.0482 g / amount mol } +!symbol nickel Ni nickel { molar_mass mass 58.6934 g / amount mol specific_heat specific_energy 0.44 J g^-1 / temperature K } +!symbol niobium Nb niobium { molar_mass mass 92.90638 g / amount mol } +!symbol nitrogen N nitrogen { molar_mass mass 14.00674 g / amount mol } @@ -6680,29 +6730,35 @@ nobelium { molar_mass mass 259.1009 g / amount mol } +!symbol osmium Os osmium { molar_mass mass 190.23 g / amount mol specific_heat specific_energy 0.13 J g^-1 / temperature K } +!symbol oxygen O oxygen { molar_mass mass 15.9994 g / amount mol } +!symbol palladium Pa palladium { molar_mass mass 106.42 g / amount mol specific_heat specific_energy 0.24 J g^-1 / temperature K } +!symbol phosphorus P phosphorus { molar_mass mass 30.973762 g / amount mol } +!symbol platinum Pt platinum { molar_mass mass 195.08 g / amount mol specific_heat specific_energy 0.13 J g^-1 / temperature K } +!symbol plutonium Pu plutonium { ?? Longest lived. 239.05 molar_mass mass 244.0642 g / amount mol @@ -6714,11 +6770,13 @@ polonium { molar_mass mass 208.9824 g / amount mol } +!symbol potassium K potassium { molar_mass mass 39.0983 g / amount mol specific_heat specific_energy 0.75 J g^-1 / temperature K } +!symbol praseodymium Pr praseodymium { molar_mass mass 140.90765 g / amount mol } @@ -6741,11 +6799,13 @@ radon { molar_mass mass 222.0176 g / amount mol } +!symbol rhenium Re rhenium { molar_mass mass 186.207 g / amount mol specific_heat specific_energy 0.14 J g^-1 / temperature K } +!symbol rhodium Rh rhodium { molar_mass mass 102.90550 g / amount mol specific_heat specific_energy 0.24 J g^-1 / temperature K @@ -6755,11 +6815,13 @@ roentgenium { molar_mass mass 280.16514 g / amount mol } +!symbol rubidium Rb rubidium { molar_mass mass 85.4678 g / amount mol specific_heat specific_energy 0.36 J g^-1 / temperature K } +!symbol ruthenium Ru ruthenium { molar_mass mass 101.07 g / amount mol specific_heat specific_energy 0.24 J g^-1 / temperature K @@ -6769,10 +6831,12 @@ rutherfordium { molar_mass mass 267.12179 g / amount mol } +!symbol samarium Sm samarium { molar_mass mass 150.36 g / amount mol } +!symbol scandium Sc scandium { molar_mass mass 44.955910 g / amount mol specific_heat specific_energy 0.57 J g^-1 / temperature K @@ -6782,35 +6846,42 @@ seaborgium { molar_mass mass 271.13393 g / amount mol } +!symbol selenium Se selenium { molar_mass mass 78.96 g / amount mol specific_heat specific_energy 0.32 J g^-1 / temperature K } +!symbol silicon Si silicon { molar_mass mass 28.0855 g / amount mol specific_heat specific_energy 0.71 J g^-1 / temperature K } +!symbol silver Ag silver { molar_mass mass 107.8682 g / amount mol specific_heat specific_energy 0.23 J g^-1 / temperature K } +!symbol sodium Na sodium { molar_mass mass 22.989768 g / amount mol specific_heat specific_energy 1.21 J g^-1 / temperature K } +!symbol strontium Sr strontium { molar_mass mass 87.62 g / amount mol specific_heat specific_energy 0.30 J g^-1 / temperature K } +!symbol sulfur S sulfur { molar_mass mass 32.066 g / amount mol } +!symbol tantalum Ta tantalum { molar_mass mass 180.9479 g / amount mol specific_heat specific_energy 0.14 J g^-1 / temperature K @@ -6821,38 +6892,46 @@ technetium { molar_mass mass 97.9072 g / amount mol } +!symbol tellurium Te tellurium { molar_mass mass 127.60 g / amount mol } +!symbol terbium Tb terbium { molar_mass mass 158.92534 g / amount mol } +!symbol thallium Tl thallium { molar_mass mass 204.3833 g / amount mol specific_heat specific_energy 0.13 J g^-1 / temperature K } +!symbol thorium Th thorium { molar_mass mass 232.0381 g / amount mol specific_heat specific_energy 0.13 J g^-1 / temperature K } +!symbol thullium Tm thullium { molar_mass mass 168.93421 g / amount mol } +!symbol tin Sn tin { molar_mass mass 118.710 g / amount mol specific_heat specific_energy 0.21 J g^-1 / temperature K } +!symbol titanium Ti titanium { molar_mass mass 47.867 g / amount mol specific_heat specific_energy 0.54 J g^-1 / temperature K } +!symbol tungsten W tungsten { molar_mass mass 183.84 g / amount mol specific_heat specific_energy 0.13 J g^-1 / temperature K @@ -6874,6 +6953,7 @@ ununtrium { molar_mass mass 284.17873 g / amount mol } +!symbol uranium U uranium { molar_mass mass 238.0289 g / amount mol specific_heat specific_energy 0.12 J g^-1 / temperature K @@ -6888,29 +6968,35 @@ uranium { specific_energy_235_fission fission_energy 200 MeV / mass ((235.0439299 g/mol) / avogadro) } +!symbol vanadium V vanadium { molar_mass mass 50.9415 g / amount mol specific_heat specific_energy 0.39 J g^-1 / temperature K } +!symbol xenon Xe xenon { molar_mass mass 131.29 g / amount mol } +!symbol ytterbium Yb ytterbium { molar_mass mass 173.04 g / amount mol } +!symbol yttrium Y yttrium { molar_mass mass 88.90585 g / amount mol specific_heat specific_energy 0.30 J g^-1 / temperature K } +!symbol zinc Zn zinc { molar_mass mass 65.39 g / amount mol specific_heat specific_energy 0.39 J g^-1 / temperature K } +!symbol zirconium Zr zirconium { molar_mass mass 91.224 g / amount mol specific_heat specific_energy 0.27 J g^-1 / temperature K @@ -6933,6 +7019,28 @@ air 78.08 % nitrogen 2 \ + 1.14 ppm krypton \ + 0.55 ppm hydrogen 2 +# Various abbreviations used in organic chemistry. + +!symbol methyl Me +methyl { + molar_mass mass 15.03482 g / amount mol +} + +!symbol ethyl Et +ethyl { + molar_mass mass 29.0617 g / amount mol +} + +!symbol acetyl Ac +acetyl { + molar_mass mass 43.04522 g / amount mol +} + +!symbol phenyl Ph +phenyl { + molar_mass mass 77.1057 g / amount mol +} + !endcategory ############################################################################ diff --git a/src/ast.rs b/src/ast.rs index 0ab9af7..b23b0d2 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -108,7 +108,10 @@ pub enum Def { SPrefix(Expr), Unit(Expr), Quantity(Expr), - Substance(Vec), + Substance { + symbol: Option, + properties: Vec + }, Category(String), Error(String), } diff --git a/src/context.rs b/src/context.rs index b7792b7..8b04ca8 100644 --- a/src/context.rs +++ b/src/context.rs @@ -25,6 +25,7 @@ pub struct Context { pub category_names: BTreeMap, pub datepatterns: Vec>, pub substances: BTreeMap, + pub substance_symbols: BTreeMap, pub temporaries: BTreeMap, pub short_output: bool, pub use_humanize: bool, @@ -46,6 +47,7 @@ impl Context { category_names: BTreeMap::new(), datepatterns: Vec::new(), substances: BTreeMap::new(), + substance_symbols: BTreeMap::new(), temporaries: BTreeMap::new(), short_output: false, use_humanize: true, diff --git a/src/eval.rs b/src/eval.rs index 6769224..3dd77ce 100644 --- a/src/eval.rs +++ b/src/eval.rs @@ -19,6 +19,7 @@ use reply::{ use search; use context::Context; use substance::SubstanceGetError; +use formula::substance_from_formula; impl Context { /// Evaluates an expression to compute its value, *excluding* `->` @@ -69,8 +70,13 @@ impl Context { Ok(Value::DateTime(date::GenericDateTime::Fixed(date::now()))), Expr::Unit(ref name) => self.lookup(name).map(Value::Number) - .or_else(|| self.substances.get(name) - .cloned().map(Value::Substance) + .or_else(|| + self.substances.get(name) + .cloned().map(Value::Substance) + ) + .or_else(|| + substance_from_formula(name, &self.substance_symbols, &self.substances) + .map(Value::Substance) ) .ok_or_else(|| QueryError::NotFound( self.unknown_unit_err(name) diff --git a/src/formula.rs b/src/formula.rs new file mode 100644 index 0000000..5b9233e --- /dev/null +++ b/src/formula.rs @@ -0,0 +1,107 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::str::{Chars, FromStr}; +use std::iter::Peekable; +use std::collections::BTreeMap; +use std::rc::Rc; +use num::Num; +use number::{Number, Dim}; +use substance::{Property, Properties, Substance}; + +enum Token { + Symbol(String), + Count(u32), + Error +} + +#[derive(Clone)] +struct TokenIterator<'a>(Peekable>); + +impl<'a> TokenIterator<'a> { + pub fn new(input: &'a str) -> TokenIterator<'a> { + TokenIterator(input.chars().peekable()) + } +} + +impl<'a> Iterator for TokenIterator<'a> { + type Item = Token; + + fn next(&mut self) -> Option { + if self.0.peek() == None { + return None + } + let res = match self.0.next().unwrap() { + letter @ 'A'...'Z' => { + let mut symbol = String::new(); + symbol.push(letter); + match self.0.peek().cloned() { + Some('a'...'z') => symbol.push(self.0.next().unwrap()), + _ => () + } + Token::Symbol(symbol) + } + digit @ '0'...'9' => { + let mut integer = String::new(); + integer.push(digit); + while let Some('0'...'9') = self.0.peek().cloned() { + integer.push(self.0.next().unwrap()) + } + Token::Count(u32::from_str(&integer).unwrap()) + } + _ => Token::Error + }; + Some(res) + } +} + +pub fn substance_from_formula(formula: &str, + symbols: &BTreeMap, + substances: &BTreeMap) -> Option { + let mut molar_mass_unit = BTreeMap::new(); + molar_mass_unit.insert(Dim::new("kg"), 1); + molar_mass_unit.insert(Dim::new("mol"), -1); + let mut total_molar_mass = Number { value: Num::from(0), unit: molar_mass_unit }; + + let mut iter = TokenIterator::new(formula).peekable(); + while let Some(token) = iter.next() { + match token { + Token::Symbol(ref sym) if symbols.contains_key(sym) => { + let count = match iter.peek() { + Some(&Token::Count(n)) => { + iter.next().unwrap(); + Number::new(Num::from(n as i64)) + } + _ => Number::one() + }; + + let subst = substances.get(symbols.get(sym).unwrap()).unwrap(); + match subst.get("molar_mass") { + Ok(subst_molar_mass) => { + let subst_molar_mass = (&subst_molar_mass * &count).unwrap(); + total_molar_mass = (&total_molar_mass + &subst_molar_mass).unwrap(); + } + Err(_) => return None + } + } + _ => return None + } + } + + let mut props = BTreeMap::new(); + props.insert("molar_mass".to_owned(), Property { + output: total_molar_mass, + output_name: "mass".to_owned(), + input: Number::one(), + input_name: "amount".to_owned(), + doc: None, + }); + Some(Substance { + amount: Number::one(), + properties: Rc::new(Properties { + name: formula.to_owned(), + properties: props, + }) + }) +} diff --git a/src/gnu_units.rs b/src/gnu_units.rs index 8b0aeb9..64b80f7 100644 --- a/src/gnu_units.rs +++ b/src/gnu_units.rs @@ -5,6 +5,7 @@ use std::str::Chars; use std::iter::Peekable; use std::rc::Rc; +use std::collections::BTreeMap; use ast::*; use num::Num; @@ -295,6 +296,7 @@ pub fn parse(mut iter: &mut Iter) -> Defs { let mut line = 1; let mut doc = None; let mut category = None; + let mut symbols = BTreeMap::new(); loop { match iter.next().unwrap() { Token::Newline => line += 1, @@ -321,6 +323,14 @@ pub fn parse(mut iter: &mut Iter) -> Defs { } category = None }, + Token::Ident(ref s) if s == "symbol" => { + match (iter.next().unwrap(), iter.next().unwrap()) { + (Token::Ident(subst), Token::Ident(sym)) => { + symbols.insert(subst, sym); + } + _ => println!("Malformed symbol directive"), + } + } _ => loop { match iter.peek().cloned().unwrap() { Token::Newline | Token::Eof => break, @@ -478,7 +488,10 @@ pub fn parse(mut iter: &mut Iter) -> Defs { } map.push(DefEntry { name: name, - def: Rc::new(Def::Substance(props)), + def: Rc::new(Def::Substance { + symbol: None, + properties: props + }), doc: doc.take(), category: category.clone(), }); @@ -497,8 +510,18 @@ pub fn parse(mut iter: &mut Iter) -> Defs { x => println!("Expected definition on line {}, got {:?}", line, x), }; } + + for entry in map.iter_mut() { + match Rc::get_mut(&mut entry.def).unwrap() { + &mut Def::Substance { ref mut symbol, .. } => { + *symbol = symbols.get(&entry.name).map(|x| x.to_owned()) + } + _ => () + } + } + Defs { - defs: map, + defs: map } } diff --git a/src/lib.rs b/src/lib.rs index 4363db3..5adb47c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -68,6 +68,7 @@ pub mod reply; pub mod search; pub mod load; pub mod substance; +pub mod formula; #[cfg(feature = "currency")] pub mod currency; #[cfg(feature = "currency")] diff --git a/src/load.rs b/src/load.rs index 6c3ea03..aea5659 100644 --- a/src/load.rs +++ b/src/load.rs @@ -144,8 +144,8 @@ impl Context { Def::Canonicalization(ref e) => { self.lookup(&Rc::new(e.clone())); }, - Def::Substance(ref props) => { - for prop in props { + Def::Substance { ref properties, .. } => { + for prop in properties { self.eval(&prop.input); self.eval(&prop.output); } @@ -296,9 +296,9 @@ impl Context { Ok(_) => println!("Quantity {} is not a number", name), Err(e) => println!("Quantity {} is malformed: {}", name, e) }, - Def::Substance(ref props) => { + Def::Substance { ref properties, ref symbol } => { let mut prev = BTreeMap::new(); - let res = props.iter().map(|prop| { + let res = properties.iter().map(|prop| { let input = match self.eval(&prop.input) { Ok(Value::Number(v)) => v, Ok(x) => return Err(format!( @@ -364,10 +364,13 @@ impl Context { self.substances.insert(name.clone(), Substance { amount: Number::one(), properties: Rc::new(Properties { - name: name, + name: name.clone(), properties: res, }), }); + if let &Some(ref symbol) = symbol { + self.substance_symbols.insert(symbol.clone(), name.clone()); + } }, Err(e) => println!("Substance {} is malformed: {}", name, e), }