From 784382eddee7e454d1a417a1223882127ae70c84 Mon Sep 17 00:00:00 2001 From: Michael Angerman <1809991+stormasm@users.noreply.github.com> Date: Thu, 24 Feb 2022 03:58:20 -0800 Subject: [PATCH] 30 web_tables tests are now passing (#4623) --- crates/nu_plugin_query/src/web_tables.rs | 1782 +++++++++++----------- 1 file changed, 888 insertions(+), 894 deletions(-) diff --git a/crates/nu_plugin_query/src/web_tables.rs b/crates/nu_plugin_query/src/web_tables.rs index a2809dbd26..d60e19a940 100644 --- a/crates/nu_plugin_query/src/web_tables.rs +++ b/crates/nu_plugin_query/src/web_tables.rs @@ -331,897 +331,891 @@ fn contains_str(slice: &[String], item: &str) -> bool { }) } -// #[cfg(test)] -// mod tests { -// use super::*; -// use crate::query_web::retrieve_tables; -// use indexmap::indexmap; -// use nu_protocol::Value; - -// const TABLE_EMPTY: &str = r#" -//
-// "#; - -// const TABLE_TH: &str = r#" -// -// -//
NameAge
-// "#; - -// const TABLE_TD: &str = r#" -// -// -//
NameAge
-// "#; - -// const TWO_TABLES_TD: &str = r#" -// -// -//
NameAge
-// -// -//
ProfessionCivil State
-// "#; - -// const TABLE_TH_TD: &str = r#" -// -// -// -//
NameAge
John20
-// "#; - -// const TWO_TABLES_TH_TD: &str = r#" -// -// -// -//
NameAge
John20
-// -// -// -//
ProfessionCivil State
MechanicSingle
-// "#; - -// const TABLE_TD_TD: &str = r#" -// -// -// -//
NameAge
John20
-// "#; - -// const TABLE_TH_TH: &str = r#" -// -// -// -//
NameAge
John20
-// "#; - -// const TABLE_COMPLEX: &str = r#" -// -// -// -// -// -// -//
NameAgeExtra
John20
May30foo
abcd
-// "#; - -// const TWO_TABLES_COMPLEX: &str = r#" -// -// -// foo -// -// -// -// -// -// -// -//
NameAgeExtra
John20
May30foo
abcd
-// -// -// -// -// -// -//
ProfessionCivil StateExtra
CarpenterSingle
MechanicMarriedbar
efgh
-// -// -// "#; - -// const HTML_NO_TABLE: &str = r#" -// -// -// foo -//

Hi.

-// -// "#; - -// const HTML_TWO_TABLES: &str = r#" -// -// -// foo -// -// -// -// -//
NameAge
John20
-// -// -// -//
NameWeight
John150
-// -// -// "#; - -// const HTML_TABLE_FRAGMENT: &str = r#" -// -// -// -//
NameAge
John20
-// -// -// "#; - -// const HTML_TABLE_WIKIPEDIA_WITH_COLUMN_NAMES: &str = r#" -// -// -// -// -// -// -// -// -// -// -// -// -// -// -// -// -// -// -// -// -// -// -// -// -// -//
Excel 2007 formats -//
Format -// Extension -// Description -//
Excel Workbook -// .xlsx -// The default Excel 2007 and later workbook format. In reality, a Zip compressed archive with a directory structure of XML text documents. -//Functions as the primary replacement for the former binary .xls format, although it does not support Excel macros for security reasons. Saving as .xlsx offers file size reduction over .xls[38] -//
Excel Macro-enabled Workbook -// .xlsm -// As Excel Workbook, but with macro support. -//
Excel Binary Workbook -// .xlsb -// As Excel Macro-enabled Workbook, but storing information in binary form rather than XML documents for opening and saving documents more quickly and efficiently. Intended especially for very large documents with tens of thousands of rows, and/or several hundreds -//of columns. This format is very useful for shrinking large Excel files as is often the case when doing data analysis. -//
Excel Macro-enabled Template -// .xltm -// A template document that forms a basis for actual workbooks, with macro support. The replacement for the old .xlt format. -//
Excel Add-in -// .xlam -// Excel add-in to add extra functionality and tools. Inherent macro support because of the file purpose. -//
-// "#; - -// const HTML_TABLE_WIKIPEDIA_COLUMNS_AS_ROWS: &str = r#" -// -// -// -// -// -// -// -// -// -// -// -// -// -// -// -// -// -// -// -// -// -// -// -// -// -// -// -// -// -// -// -// -// -// -// -// -// -// -// -// -// -// -// -// -//
-// Microsoft Excel -//
-// Microsoft Office Excel (2019–present).svg -//
-// Microsoft Excel.png -//
-// A simple -// line chart being -// created in Excel, running on -// Windows 10 -//
-//
-// Developer(s) -// -// Microsoft -//
-// Initial release -// -// 1987; 34 years ago (1987) -//
-// Stable release -// -//
-// 2103 (16.0.13901.20400) / April 13, 2021; 4 months ago (2021-04-13)[1] -//
-//
-// Operating system -// -// Microsoft Windows -//
-// Type -// -// Spreadsheet -//
-// License -// -// Trialware[2] -//
-// Website -// -// products.office.com/en-us/excel -//
-// "#; - -// #[test] -// fn test_find_first_none() { -// assert_eq!(None, Table::find_first("")); -// assert_eq!(None, Table::find_first("foo")); -// assert_eq!(None, Table::find_first(HTML_NO_TABLE)); -// } - -// #[test] -// fn test_find_first_empty() { -// let empty = Table { -// headers: HashMap::new(), -// data: Vec::new(), -// }; -// assert_eq!(Some(empty), Table::find_first(TABLE_EMPTY)); -// } - -// #[test] -// fn test_find_first_some() { -// assert!(Table::find_first(TABLE_TH).is_some()); -// assert!(Table::find_first(TABLE_TD).is_some()); -// } - -// #[test] -// fn test_find_by_id_none() { -// assert_eq!(None, Table::find_by_id("", "")); -// assert_eq!(None, Table::find_by_id("foo", "id")); -// assert_eq!(None, Table::find_by_id(HTML_NO_TABLE, "id")); - -// assert_eq!(None, Table::find_by_id(TABLE_EMPTY, "id")); -// assert_eq!(None, Table::find_by_id(TABLE_TH, "id")); -// assert_eq!(None, Table::find_by_id(TABLE_TH, "")); -// assert_eq!(None, Table::find_by_id(HTML_TWO_TABLES, "id")); -// } - -// #[test] -// fn test_find_by_id_some() { -// assert!(Table::find_by_id(HTML_TWO_TABLES, "first").is_some()); -// assert!(Table::find_by_id(HTML_TWO_TABLES, "second").is_some()); -// } - -// #[test] -// fn test_find_by_headers_empty() { -// let headers: [&str; 0] = []; - -// assert_eq!(None, Table::find_by_headers("", &headers)); -// assert_eq!(None, Table::find_by_headers("foo", &headers)); -// assert_eq!(None, Table::find_by_headers(HTML_NO_TABLE, &headers)); - -// assert!(Table::find_by_headers(TABLE_EMPTY, &headers).is_some()); -// assert!(Table::find_by_headers(HTML_TWO_TABLES, &headers).is_some()); -// } - -// #[test] -// fn test_find_by_headers_none() { -// let headers = ["Name", "Age"]; -// let bad_headers = ["Name", "BAD"]; - -// assert_eq!(None, Table::find_by_headers("", &headers)); -// assert_eq!(None, Table::find_by_headers("foo", &headers)); -// assert_eq!(None, Table::find_by_headers(HTML_NO_TABLE, &headers)); - -// assert_eq!(None, Table::find_by_headers(TABLE_EMPTY, &bad_headers)); -// assert_eq!(None, Table::find_by_headers(TABLE_TH, &bad_headers)); - -// assert_eq!(None, Table::find_by_headers(TABLE_TD, &headers)); -// assert_eq!(None, Table::find_by_headers(TABLE_TD, &bad_headers)); -// } - -// #[test] -// fn test_find_by_headers_some() { -// let headers: [&str; 0] = []; -// assert!(Table::find_by_headers(TABLE_TH, &headers).is_some()); -// assert!(Table::find_by_headers(TABLE_TH_TD, &headers).is_some()); -// assert!(Table::find_by_headers(HTML_TWO_TABLES, &headers).is_some()); - -// let headers = ["Name"]; -// assert!(Table::find_by_headers(TABLE_TH, &headers).is_some()); -// assert!(Table::find_by_headers(TABLE_TH_TD, &headers).is_some()); -// assert!(Table::find_by_headers(HTML_TWO_TABLES, &headers).is_some()); - -// let headers = ["Age", "Name"]; -// assert!(Table::find_by_headers(TABLE_TH, &headers).is_some()); -// assert!(Table::find_by_headers(TABLE_TH_TD, &headers).is_some()); -// assert!(Table::find_by_headers(HTML_TWO_TABLES, &headers).is_some()); -// } - -// #[test] -// fn test_find_first_incomplete_fragment() { -// assert!(Table::find_first(HTML_TABLE_FRAGMENT).is_some()); -// } - -// #[test] -// fn test_headers_empty() { -// let empty = HashMap::new(); -// assert_eq!(&empty, Table::find_first(TABLE_TD).unwrap().headers()); -// assert_eq!(&empty, Table::find_first(TABLE_TD_TD).unwrap().headers()); -// } - -// #[test] -// fn test_headers_nonempty() { -// let mut headers = HashMap::new(); -// headers.insert("Name".to_string(), 0); -// headers.insert("Age".to_string(), 1); - -// assert_eq!(&headers, Table::find_first(TABLE_TH).unwrap().headers()); -// assert_eq!(&headers, Table::find_first(TABLE_TH_TD).unwrap().headers()); -// assert_eq!(&headers, Table::find_first(TABLE_TH_TH).unwrap().headers()); - -// headers.insert("Extra".to_string(), 2); -// assert_eq!( -// &headers, -// Table::find_first(TABLE_COMPLEX).unwrap().headers() -// ); -// } - -// #[test] -// fn test_iter_empty() { -// assert_eq!(0, Table::find_first(TABLE_EMPTY).unwrap().iter().count()); -// assert_eq!(0, Table::find_first(TABLE_TH).unwrap().iter().count()); -// } - -// #[test] -// fn test_iter_nonempty() { -// assert_eq!(1, Table::find_first(TABLE_TD).unwrap().iter().count()); -// assert_eq!(1, Table::find_first(TABLE_TH_TD).unwrap().iter().count()); -// assert_eq!(2, Table::find_first(TABLE_TD_TD).unwrap().iter().count()); -// assert_eq!(1, Table::find_first(TABLE_TH_TH).unwrap().iter().count()); -// assert_eq!(4, Table::find_first(TABLE_COMPLEX).unwrap().iter().count()); -// } - -// #[test] -// fn test_row_is_empty() { -// let table = Table::find_first(TABLE_TD).unwrap(); -// assert_eq!( -// vec![false], -// table.iter().map(|r| r.is_empty()).collect::>() -// ); - -// let table = Table::find_first(TABLE_COMPLEX).unwrap(); -// assert_eq!( -// vec![false, false, true, false], -// table.iter().map(|r| r.is_empty()).collect::>() -// ); -// } - -// #[test] -// fn test_row_len() { -// let table = Table::find_first(TABLE_TD).unwrap(); -// assert_eq!(vec![2], table.iter().map(|r| r.len()).collect::>()); - -// let table = Table::find_first(TABLE_COMPLEX).unwrap(); -// assert_eq!( -// vec![2, 3, 0, 4], -// table.iter().map(|r| r.len()).collect::>() -// ); -// } - -// #[test] -// fn test_row_len_two_tables() { -// let tables = Table::find_all_tables(HTML_TWO_TABLES).unwrap(); -// let mut tables_iter = tables.iter(); -// let table_1 = tables_iter.next().unwrap(); -// let table_2 = tables_iter.next().unwrap(); -// assert_eq!(vec![2], table_1.iter().map(|r| r.len()).collect::>()); -// assert_eq!(vec![2], table_2.iter().map(|r| r.len()).collect::>()); - -// let tables = Table::find_all_tables(TWO_TABLES_COMPLEX).unwrap(); -// let mut tables_iter = tables.iter(); -// let table_1 = tables_iter.next().unwrap(); -// let table_2 = tables_iter.next().unwrap(); -// assert_eq!( -// vec![2, 3, 0, 4], -// table_1.iter().map(|r| r.len()).collect::>() -// ); -// assert_eq!( -// vec![2, 3, 0, 4], -// table_2.iter().map(|r| r.len()).collect::>() -// ); -// } - -// #[test] -// fn test_row_get_without_headers() { -// let table = Table::find_first(TABLE_TD).unwrap(); -// let mut iter = table.iter(); -// let row = iter.next().unwrap(); - -// assert_eq!(None, row.get("")); -// assert_eq!(None, row.get("foo")); -// assert_eq!(None, row.get("Name")); -// assert_eq!(None, row.get("Age")); - -// assert_eq!(None, iter.next()); -// } - -// #[test] -// fn test_row_get_with_headers() { -// let table = Table::find_first(TABLE_TH_TD).unwrap(); -// let mut iter = table.iter(); -// let row = iter.next().unwrap(); - -// assert_eq!(None, row.get("")); -// assert_eq!(None, row.get("foo")); -// assert_eq!(Some("John"), row.get("Name")); -// assert_eq!(Some("20"), row.get("Age")); - -// assert_eq!(None, iter.next()); -// } - -// #[test] -// fn test_row_get_complex() { -// let table = Table::find_first(TABLE_COMPLEX).unwrap(); -// let mut iter = table.iter(); - -// let row = iter.next().unwrap(); -// assert_eq!(Some("John"), row.get("Name")); -// assert_eq!(Some("20"), row.get("Age")); -// assert_eq!(None, row.get("Extra")); - -// let row = iter.next().unwrap(); -// assert_eq!(Some("May"), row.get("Name")); -// assert_eq!(Some("30"), row.get("Age")); -// assert_eq!(Some("foo"), row.get("Extra")); - -// let row = iter.next().unwrap(); -// assert_eq!(None, row.get("Name")); -// assert_eq!(None, row.get("Age")); -// assert_eq!(None, row.get("Extra")); - -// let row = iter.next().unwrap(); -// assert_eq!(Some("a"), row.get("Name")); -// assert_eq!(Some("b"), row.get("Age")); -// assert_eq!(Some("c"), row.get("Extra")); - -// assert_eq!(None, iter.next()); -// } - -// #[test] -// fn test_two_tables_row_get_complex() { -// let tables = Table::find_all_tables(TWO_TABLES_COMPLEX).unwrap(); -// let mut tables_iter = tables.iter(); -// let table_1 = tables_iter.next().unwrap(); -// let table_2 = tables_iter.next().unwrap(); -// let mut iter_1 = table_1.iter(); -// let mut iter_2 = table_2.iter(); - -// let row_table_1 = iter_1.next().unwrap(); -// let row_table_2 = iter_2.next().unwrap(); -// assert_eq!(Some("John"), row_table_1.get("Name")); -// assert_eq!(Some("20"), row_table_1.get("Age")); -// assert_eq!(None, row_table_1.get("Extra")); -// assert_eq!(Some("Carpenter"), row_table_2.get("Profession")); -// assert_eq!(Some("Single"), row_table_2.get("Civil State")); -// assert_eq!(None, row_table_2.get("Extra")); - -// let row_table_1 = iter_1.next().unwrap(); -// let row_table_2 = iter_2.next().unwrap(); -// assert_eq!(Some("May"), row_table_1.get("Name")); -// assert_eq!(Some("30"), row_table_1.get("Age")); -// assert_eq!(Some("foo"), row_table_1.get("Extra")); -// assert_eq!(Some("Mechanic"), row_table_2.get("Profession")); -// assert_eq!(Some("Married"), row_table_2.get("Civil State")); -// assert_eq!(Some("bar"), row_table_2.get("Extra")); - -// let row_table_1 = iter_1.next().unwrap(); -// let row_table_2 = iter_2.next().unwrap(); -// assert_eq!(None, row_table_1.get("Name")); -// assert_eq!(None, row_table_1.get("Age")); -// assert_eq!(None, row_table_1.get("Extra")); -// assert_eq!(None, row_table_2.get("Name")); -// assert_eq!(None, row_table_2.get("Age")); -// assert_eq!(None, row_table_2.get("Extra")); - -// let row_table_1 = iter_1.next().unwrap(); -// let row_table_2 = iter_2.next().unwrap(); -// assert_eq!(Some("a"), row_table_1.get("Name")); -// assert_eq!(Some("b"), row_table_1.get("Age")); -// assert_eq!(Some("c"), row_table_1.get("Extra")); -// assert_eq!(Some("e"), row_table_2.get("Profession")); -// assert_eq!(Some("f"), row_table_2.get("Civil State")); -// assert_eq!(Some("g"), row_table_2.get("Extra")); - -// assert_eq!(None, iter_1.next()); -// assert_eq!(None, iter_2.next()); -// } - -// #[test] -// fn test_row_as_slice_without_headers() { -// let table = Table::find_first(TABLE_TD).unwrap(); -// let mut iter = table.iter(); - -// assert_eq!(&["Name", "Age"], iter.next().unwrap().as_slice()); -// assert_eq!(None, iter.next()); -// } - -// #[test] -// fn test_row_as_slice_without_headers_two_tables() { -// let tables = Table::find_all_tables(TWO_TABLES_TD).unwrap(); -// let mut tables_iter = tables.iter(); -// let table_1 = tables_iter.next().unwrap(); -// let table_2 = tables_iter.next().unwrap(); -// let mut iter_1 = table_1.iter(); -// let mut iter_2 = table_2.iter(); - -// assert_eq!(&["Name", "Age"], iter_1.next().unwrap().as_slice()); -// assert_eq!( -// &["Profession", "Civil State"], -// iter_2.next().unwrap().as_slice() -// ); -// assert_eq!(None, iter_1.next()); -// assert_eq!(None, iter_2.next()); -// } - -// #[test] -// fn test_row_as_slice_with_headers() { -// let table = Table::find_first(TABLE_TH_TD).unwrap(); -// let mut iter = table.iter(); - -// assert_eq!(&["John", "20"], iter.next().unwrap().as_slice()); -// assert_eq!(None, iter.next()); -// } - -// #[test] -// fn test_row_as_slice_with_headers_two_tables() { -// let tables = Table::find_all_tables(TWO_TABLES_TH_TD).unwrap(); -// let mut tables_iter = tables.iter(); -// let table_1 = tables_iter.next().unwrap(); -// let table_2 = tables_iter.next().unwrap(); -// let mut iter_1 = table_1.iter(); -// let mut iter_2 = table_2.iter(); - -// assert_eq!(&["John", "20"], iter_1.next().unwrap().as_slice()); -// assert_eq!(&["Mechanic", "Single"], iter_2.next().unwrap().as_slice()); -// assert_eq!(None, iter_1.next()); -// assert_eq!(None, iter_2.next()); -// } - -// #[test] -// fn test_row_as_slice_complex() { -// let table = Table::find_first(TABLE_COMPLEX).unwrap(); -// let mut iter = table.iter(); -// let empty: [&str; 0] = []; - -// assert_eq!(&["John", "20"], iter.next().unwrap().as_slice()); -// assert_eq!(&["May", "30", "foo"], iter.next().unwrap().as_slice()); -// assert_eq!(&empty, iter.next().unwrap().as_slice()); -// assert_eq!(&["a", "b", "c", "d"], iter.next().unwrap().as_slice()); -// assert_eq!(None, iter.next()); -// } - -// #[test] -// fn test_row_as_slice_complex_two_tables() { -// let tables = Table::find_all_tables(TWO_TABLES_COMPLEX).unwrap(); -// let mut tables_iter = tables.iter(); -// let table_1 = tables_iter.next().unwrap(); -// let table_2 = tables_iter.next().unwrap(); -// let mut iter_1 = table_1.iter(); -// let mut iter_2 = table_2.iter(); -// let empty: [&str; 0] = []; - -// assert_eq!(&["John", "20"], iter_1.next().unwrap().as_slice()); -// assert_eq!(&["May", "30", "foo"], iter_1.next().unwrap().as_slice()); -// assert_eq!(&empty, iter_1.next().unwrap().as_slice()); -// assert_eq!(&["a", "b", "c", "d"], iter_1.next().unwrap().as_slice()); -// assert_eq!(None, iter_1.next()); -// assert_eq!(&["Carpenter", "Single"], iter_2.next().unwrap().as_slice()); -// assert_eq!( -// &["Mechanic", "Married", "bar"], -// iter_2.next().unwrap().as_slice() -// ); -// assert_eq!(&empty, iter_2.next().unwrap().as_slice()); -// assert_eq!(&["e", "f", "g", "h"], iter_2.next().unwrap().as_slice()); -// assert_eq!(None, iter_2.next()); -// } - -// #[test] -// fn test_row_iter_simple() { -// let table = Table::find_first(TABLE_TD).unwrap(); -// let row = table.iter().next().unwrap(); -// let mut iter = row.iter(); - -// assert_eq!(Some("Name"), iter.next().map(String::as_str)); -// assert_eq!(Some("Age"), iter.next().map(String::as_str)); -// assert_eq!(None, iter.next()); -// } - -// #[test] -// fn test_row_iter_simple_two_tables() { -// let tables = Table::find_all_tables(TWO_TABLES_TD).unwrap(); -// let mut tables_iter = tables.iter(); -// let table_1 = tables_iter.next().unwrap(); -// let table_2 = tables_iter.next().unwrap(); -// let row_1 = table_1.iter().next().unwrap(); -// let row_2 = table_2.iter().next().unwrap(); -// let mut iter_1 = row_1.iter(); -// let mut iter_2 = row_2.iter(); - -// assert_eq!(Some("Name"), iter_1.next().map(String::as_str)); -// assert_eq!(Some("Age"), iter_1.next().map(String::as_str)); -// assert_eq!(None, iter_1.next()); -// assert_eq!(Some("Profession"), iter_2.next().map(String::as_str)); -// assert_eq!(Some("Civil State"), iter_2.next().map(String::as_str)); -// assert_eq!(None, iter_2.next()); -// } - -// #[test] -// fn test_row_iter_complex() { -// let table = Table::find_first(TABLE_COMPLEX).unwrap(); -// let mut table_iter = table.iter(); - -// let row = table_iter.next().unwrap(); -// let mut iter = row.iter(); -// assert_eq!(Some("John"), iter.next().map(String::as_str)); -// assert_eq!(Some("20"), iter.next().map(String::as_str)); -// assert_eq!(None, iter.next()); - -// let row = table_iter.next().unwrap(); -// let mut iter = row.iter(); -// assert_eq!(Some("May"), iter.next().map(String::as_str)); -// assert_eq!(Some("30"), iter.next().map(String::as_str)); -// assert_eq!(Some("foo"), iter.next().map(String::as_str)); -// assert_eq!(None, iter.next()); - -// let row = table_iter.next().unwrap(); -// let mut iter = row.iter(); -// assert_eq!(None, iter.next()); - -// let row = table_iter.next().unwrap(); -// let mut iter = row.iter(); -// assert_eq!(Some("a"), iter.next().map(String::as_str)); -// assert_eq!(Some("b"), iter.next().map(String::as_str)); -// assert_eq!(Some("c"), iter.next().map(String::as_str)); -// assert_eq!(Some("d"), iter.next().map(String::as_str)); -// assert_eq!(None, iter.next()); -// } - -// #[test] -// fn test_row_iter_complex_two_tables() { -// let tables = Table::find_all_tables(TWO_TABLES_COMPLEX).unwrap(); -// let mut tables_iter = tables.iter(); -// let mut table_1 = tables_iter.next().unwrap().iter(); -// let mut table_2 = tables_iter.next().unwrap().iter(); - -// let row_1 = table_1.next().unwrap(); -// let row_2 = table_2.next().unwrap(); -// let mut iter_1 = row_1.iter(); -// let mut iter_2 = row_2.iter(); -// assert_eq!(Some("John"), iter_1.next().map(String::as_str)); -// assert_eq!(Some("20"), iter_1.next().map(String::as_str)); -// assert_eq!(None, iter_1.next()); -// assert_eq!(Some("Carpenter"), iter_2.next().map(String::as_str)); -// assert_eq!(Some("Single"), iter_2.next().map(String::as_str)); -// assert_eq!(None, iter_2.next()); - -// let row_1 = table_1.next().unwrap(); -// let row_2 = table_2.next().unwrap(); -// let mut iter_1 = row_1.iter(); -// let mut iter_2 = row_2.iter(); -// assert_eq!(Some("May"), iter_1.next().map(String::as_str)); -// assert_eq!(Some("30"), iter_1.next().map(String::as_str)); -// assert_eq!(Some("foo"), iter_1.next().map(String::as_str)); -// assert_eq!(None, iter_1.next()); -// assert_eq!(Some("Mechanic"), iter_2.next().map(String::as_str)); -// assert_eq!(Some("Married"), iter_2.next().map(String::as_str)); -// assert_eq!(Some("bar"), iter_2.next().map(String::as_str)); -// assert_eq!(None, iter_2.next()); - -// let row_1 = table_1.next().unwrap(); -// let row_2 = table_2.next().unwrap(); -// let mut iter_1 = row_1.iter(); -// let mut iter_2 = row_2.iter(); -// assert_eq!(None, iter_1.next()); -// assert_eq!(None, iter_2.next()); - -// let row_1 = table_1.next().unwrap(); -// let row_2 = table_2.next().unwrap(); -// let mut iter_1 = row_1.iter(); -// let mut iter_2 = row_2.iter(); -// assert_eq!(Some("a"), iter_1.next().map(String::as_str)); -// assert_eq!(Some("b"), iter_1.next().map(String::as_str)); -// assert_eq!(Some("c"), iter_1.next().map(String::as_str)); -// assert_eq!(Some("d"), iter_1.next().map(String::as_str)); -// assert_eq!(None, iter_1.next()); -// assert_eq!(Some("e"), iter_2.next().map(String::as_str)); -// assert_eq!(Some("f"), iter_2.next().map(String::as_str)); -// assert_eq!(Some("g"), iter_2.next().map(String::as_str)); -// assert_eq!(Some("h"), iter_2.next().map(String::as_str)); -// assert_eq!(None, iter_2.next()); -// } - -// #[test] -// fn test_wikipedia_swapped_rows_columns() { -// // empty columns -// let cols = nu_protocol::value::Value { -// value: nu_protocol::UntaggedValue::Primitive(nu_protocol::Primitive::String( -// "".to_string(), -// )), -// tag: nu_source::Tag::unknown(), -// }; - -// // this table is taken straight from wikipedia with no changes -// let table = retrieve_tables(HTML_TABLE_WIKIPEDIA_COLUMNS_AS_ROWS, &cols, true); - -// let expected = vec![UntaggedValue::row(indexmap! { -// "Stable release".to_string() => UntaggedValue::string("\n 2103 (16.0.13901.20400) / April\u{a0}13, 2021; 4 months ago\u{a0}(2021-04-13)[1]\n ").into(), -// "Developer(s)".to_string() => UntaggedValue::string("Microsoft").into(), -// "Operating system".to_string() => UntaggedValue::string("Microsoft Windows").into(), -// "Type".to_string() => UntaggedValue::string("Spreadsheet").into(), -// "License".to_string() => UntaggedValue::string("Trialware[2]").into(), -// "".to_string() => UntaggedValue::string("").into(), -// "Website".to_string() => UntaggedValue::string("products.office.com/en-us/excel").into(), -// "Initial release".to_string() => UntaggedValue::string("1987; 34\u{a0}years ago\u{a0}(1987)").into(), -// }).into()]; - -// assert_eq!(table, expected); -// } - -// #[test] -// fn test_wikipedia_table_with_column_headers() { -// let cols = UntaggedValue::table(&[ -// UntaggedValue::string("Format".to_string()).into(), -// UntaggedValue::string("Extension".to_string()).into(), -// UntaggedValue::string("Description".to_string()).into(), -// ]) -// .into(); - -// // this table is taken straight from wikipedia with no changes -// let table = retrieve_tables(HTML_TABLE_WIKIPEDIA_WITH_COLUMN_NAMES, &cols, true); -// let expected = vec![ -// UntaggedValue::row(indexmap! { -// "Format".to_string() => UntaggedValue::string("Excel Workbook").into(), -// "Extension".to_string() => UntaggedValue::string(".xlsx").into(), -// "Description".to_string() => UntaggedValue::string("The default Excel 2007 and later workbook format. In reality, a Zip compressed archive with a directory structure of XML text documents. Functions as the primary -// -//replacement for the former binary .xls format, although it does not support Excel macros for security reasons. Saving as .xlsx offers file size reduction over .xls[38]").into(), -// }).into(), -// UntaggedValue::row(indexmap! { -// "Format".to_string() => UntaggedValue::string("Excel Macro-enabled Workbook").into(), -// "Extension".to_string() => UntaggedValue::string(".xlsm").into(), -// "Description".to_string() => UntaggedValue::string("As Excel Workbook, but with macro support.").into(), -// }).into(), -// UntaggedValue::row(indexmap! { -// "Format".to_string() => UntaggedValue::string("Excel Binary Workbook").into(), -// "Extension".to_string() => UntaggedValue::string(".xlsb").into(), -// "Description".to_string() => UntaggedValue::string("As Excel Macro-enabled Workbook, but storing information in binary form rather than XML documents for opening and saving documents more quickly and efficiently. Intended especially for very large documents with tens of thousands of rows, and/or several hundreds of columns. This format is very useful for shrinking large Excel files as is often the case when doing data analysis.").into(), -// }).into(), -// UntaggedValue::row(indexmap! { -// "Format".to_string() => UntaggedValue::string("Excel Macro-enabled Template").into(), -// "Extension".to_string() => UntaggedValue::string(".xltm").into(), -// "Description".to_string() => UntaggedValue::string("A template document that forms a basis for actual workbooks, with macro support. The replacement for the old .xlt format.").into(), -// }).into(), -// UntaggedValue::row(indexmap! { -// "Format".to_string() => UntaggedValue::string("Excel Add-in").into(), -// "Extension".to_string() => UntaggedValue::string(".xlam").into(), -// "Description".to_string() => UntaggedValue::string("Excel add-in to add extra functionality and tools. Inherent macro support because of the file purpose.").into(), -// }).into(), -// ]; - -// assert_eq!(table, expected); -// } -// } +#[cfg(test)] +mod tests { + use super::*; + // use crate::query_web::retrieve_tables; + // use indexmap::indexmap; + // use nu_protocol::Value; + + const TABLE_EMPTY: &str = r#" +
+"#; + + const TABLE_TH: &str = r#" + + +
NameAge
+"#; + + const TABLE_TD: &str = r#" + + +
NameAge
+"#; + + const TWO_TABLES_TD: &str = r#" + + +
NameAge
+ + +
ProfessionCivil State
+"#; + + const TABLE_TH_TD: &str = r#" + + + +
NameAge
John20
+"#; + + const TWO_TABLES_TH_TD: &str = r#" + + + +
NameAge
John20
+ + + +
ProfessionCivil State
MechanicSingle
+"#; + + const TABLE_TD_TD: &str = r#" + + + +
NameAge
John20
+"#; + + const TABLE_TH_TH: &str = r#" + + + +
NameAge
John20
+"#; + + const TABLE_COMPLEX: &str = r#" + + + + + + +
NameAgeExtra
John20
May30foo
abcd
+"#; + + const TWO_TABLES_COMPLEX: &str = r#" + + + foo + + + + + + + +
NameAgeExtra
John20
May30foo
abcd
+ + + + + + +
ProfessionCivil StateExtra
CarpenterSingle
MechanicMarriedbar
efgh
+ + +"#; + + const HTML_NO_TABLE: &str = r#" + + + foo +

Hi.

+ +"#; + + const HTML_TWO_TABLES: &str = r#" + + + foo + + + + +
NameAge
John20
+ + + +
NameWeight
John150
+ + +"#; + + const HTML_TABLE_FRAGMENT: &str = r#" + + + +
NameAge
John20
+ + +"#; + + /* + const HTML_TABLE_WIKIPEDIA_WITH_COLUMN_NAMES: &str = r#" + + + + + + + + + + + + + < + + <.xlsb + <> + + + + + + +
Excel 2007 formats +
Format + Extension + Description +
Excel Workbook + .xlsx + The default Excel 2007 and later workbook format. In reality, a Zip compressed archive with a directory structure of XML text documents.Functions as the primary replacement for the former binary .xls format, although it does not support Excel macroor security reasons. Saving as .xlsx offers file size reduction over .xls[38] +
Excel ro-enabled Workbook + .xlsm
Excel Macro-enabled Template + .xltm + A template document that forms a basis for actual workbooks, with macro support. The replacement for the o.xlt format. +
Excel -in + .xlam
+ " + ct HTML_TABLE_WIKIPEDIA_COLUMNS_AS_ROWS: &str = r#" + + + Mosoft Excel + + + < + d colspan="2" class="infobox-image"> + Microsoft Office Excel (2019–present).svg + /tr> tr> + + + < class="infobox-caption"> + simple + href="/wiki/Line_chart" title="Line chart">line chart being + created in Excel, running on + Windows 10 + /div> + d> + + a href="/wiki/Programmer" title="Programmer">Developer(s) + h> + class="infobox-data"> + a href="/wiki/Microsoft" title="Microsoft">Microsoft + d> + + nitial release + h> + class="infobox-data"> + 987; 34 years ago (1987) + + colspan="2" class="infobox-full-data"> + + a + href="/wiki/Software_release_life_cycle" + title="Software release life cycle" + >Stable release + + +
+ 2103 (16.0.13901.20400) / April 13, 2021; 4 months ago (2021-04-13)[1] +
+ + + + + + + + Operating system + + + Microsoft Windows + + + + + Type + + + Spreadsheet + + + + + License + + + Trialware[2] + + + + + Website + + + products.office.com/en-us/excel + + + + + "#; + */ + + #[test] + fn test_find_first_none() { + assert_eq!(None, WebTable::find_first("")); + assert_eq!(None, WebTable::find_first("foo")); + assert_eq!(None, WebTable::find_first(HTML_NO_TABLE)); + } + + #[test] + fn test_find_first_empty() { + let empty = WebTable { + headers: HashMap::new(), + data: Vec::new(), + }; + assert_eq!(Some(empty), WebTable::find_first(TABLE_EMPTY)); + } + + #[test] + fn test_find_first_some() { + assert!(WebTable::find_first(TABLE_TH).is_some()); + assert!(WebTable::find_first(TABLE_TD).is_some()); + } + + #[test] + fn test_find_by_id_none() { + assert_eq!(None, WebTable::find_by_id("", "")); + assert_eq!(None, WebTable::find_by_id("foo", "id")); + assert_eq!(None, WebTable::find_by_id(HTML_NO_TABLE, "id")); + assert_eq!(None, WebTable::find_by_id(TABLE_EMPTY, "id")); + assert_eq!(None, WebTable::find_by_id(TABLE_TH, "id")); + assert_eq!(None, WebTable::find_by_id(TABLE_TH, "")); + assert_eq!(None, WebTable::find_by_id(HTML_TWO_TABLES, "id")); + } + + #[test] + fn test_find_by_id_some() { + assert!(WebTable::find_by_id(HTML_TWO_TABLES, "first").is_some()); + assert!(WebTable::find_by_id(HTML_TWO_TABLES, "second").is_some()); + } + + #[test] + fn test_find_by_headers_empty() { + let headers: [&str; 0] = []; + + assert_eq!(None, WebTable::find_by_headers("", &headers)); + assert_eq!(None, WebTable::find_by_headers("foo", &headers)); + assert_eq!(None, WebTable::find_by_headers(HTML_NO_TABLE, &headers)); + + assert!(WebTable::find_by_headers(TABLE_EMPTY, &headers).is_some()); + assert!(WebTable::find_by_headers(HTML_TWO_TABLES, &headers).is_some()); + } + + #[test] + fn test_find_by_headers_none() { + let headers = ["Name", "Age"]; + let bad_headers = ["Name", "BAD"]; + + assert_eq!(None, WebTable::find_by_headers("", &headers)); + assert_eq!(None, WebTable::find_by_headers("foo", &headers)); + assert_eq!(None, WebTable::find_by_headers(HTML_NO_TABLE, &headers)); + + assert_eq!(None, WebTable::find_by_headers(TABLE_EMPTY, &bad_headers)); + assert_eq!(None, WebTable::find_by_headers(TABLE_TH, &bad_headers)); + + assert_eq!(None, WebTable::find_by_headers(TABLE_TD, &headers)); + assert_eq!(None, WebTable::find_by_headers(TABLE_TD, &bad_headers)); + } + + #[test] + fn test_find_by_headers_some() { + let headers: [&str; 0] = []; + assert!(WebTable::find_by_headers(TABLE_TH, &headers).is_some()); + assert!(WebTable::find_by_headers(TABLE_TH_TD, &headers).is_some()); + assert!(WebTable::find_by_headers(HTML_TWO_TABLES, &headers).is_some()); + + let headers = ["Name"]; + assert!(WebTable::find_by_headers(TABLE_TH, &headers).is_some()); + assert!(WebTable::find_by_headers(TABLE_TH_TD, &headers).is_some()); + assert!(WebTable::find_by_headers(HTML_TWO_TABLES, &headers).is_some()); + + let headers = ["Age", "Name"]; + assert!(WebTable::find_by_headers(TABLE_TH, &headers).is_some()); + assert!(WebTable::find_by_headers(TABLE_TH_TD, &headers).is_some()); + assert!(WebTable::find_by_headers(HTML_TWO_TABLES, &headers).is_some()); + } + + #[test] + fn test_find_first_incomplete_fragment() { + assert!(WebTable::find_first(HTML_TABLE_FRAGMENT).is_some()); + } + + #[test] + fn test_headers_empty() { + let empty = HashMap::new(); + assert_eq!(&empty, WebTable::find_first(TABLE_TD).unwrap().headers()); + assert_eq!(&empty, WebTable::find_first(TABLE_TD_TD).unwrap().headers()); + } + + #[test] + fn test_headers_nonempty() { + let mut headers = HashMap::new(); + headers.insert("Name".to_string(), 0); + headers.insert("Age".to_string(), 1); + + assert_eq!(&headers, WebTable::find_first(TABLE_TH).unwrap().headers()); + assert_eq!( + &headers, + WebTable::find_first(TABLE_TH_TD).unwrap().headers() + ); + assert_eq!( + &headers, + WebTable::find_first(TABLE_TH_TH).unwrap().headers() + ); + + headers.insert("Extra".to_string(), 2); + assert_eq!( + &headers, + WebTable::find_first(TABLE_COMPLEX).unwrap().headers() + ); + } + + #[test] + fn test_iter_empty() { + assert_eq!(0, WebTable::find_first(TABLE_EMPTY).unwrap().iter().count()); + assert_eq!(0, WebTable::find_first(TABLE_TH).unwrap().iter().count()); + } + + #[test] + fn test_iter_nonempty() { + assert_eq!(1, WebTable::find_first(TABLE_TD).unwrap().iter().count()); + assert_eq!(1, WebTable::find_first(TABLE_TH_TD).unwrap().iter().count()); + assert_eq!(2, WebTable::find_first(TABLE_TD_TD).unwrap().iter().count()); + assert_eq!(1, WebTable::find_first(TABLE_TH_TH).unwrap().iter().count()); + assert_eq!( + 4, + WebTable::find_first(TABLE_COMPLEX).unwrap().iter().count() + ); + } + + #[test] + fn test_row_is_empty() { + let table = WebTable::find_first(TABLE_TD).unwrap(); + assert_eq!( + vec![false], + table.iter().map(|r| r.is_empty()).collect::>() + ); + + let table = WebTable::find_first(TABLE_COMPLEX).unwrap(); + assert_eq!( + vec![false, false, true, false], + table.iter().map(|r| r.is_empty()).collect::>() + ); + } + + #[test] + fn test_row_len() { + let table = WebTable::find_first(TABLE_TD).unwrap(); + assert_eq!(vec![2], table.iter().map(|r| r.len()).collect::>()); + + let table = WebTable::find_first(TABLE_COMPLEX).unwrap(); + assert_eq!( + vec![2, 3, 0, 4], + table.iter().map(|r| r.len()).collect::>() + ); + } + + #[test] + fn test_row_len_two_tables() { + let tables = WebTable::find_all_tables(HTML_TWO_TABLES).unwrap(); + let mut tables_iter = tables.iter(); + let table_1 = tables_iter.next().unwrap(); + let table_2 = tables_iter.next().unwrap(); + assert_eq!(vec![2], table_1.iter().map(|r| r.len()).collect::>()); + assert_eq!(vec![2], table_2.iter().map(|r| r.len()).collect::>()); + + let tables = WebTable::find_all_tables(TWO_TABLES_COMPLEX).unwrap(); + let mut tables_iter = tables.iter(); + let table_1 = tables_iter.next().unwrap(); + let table_2 = tables_iter.next().unwrap(); + assert_eq!( + vec![2, 3, 0, 4], + table_1.iter().map(|r| r.len()).collect::>() + ); + assert_eq!( + vec![2, 3, 0, 4], + table_2.iter().map(|r| r.len()).collect::>() + ); + } + + #[test] + fn test_row_get_without_headers() { + let table = WebTable::find_first(TABLE_TD).unwrap(); + let mut iter = table.iter(); + let row = iter.next().unwrap(); + + assert_eq!(None, row.get("")); + assert_eq!(None, row.get("foo")); + assert_eq!(None, row.get("Name")); + assert_eq!(None, row.get("Age")); + + assert_eq!(None, iter.next()); + } + + #[test] + fn test_row_get_with_headers() { + let table = WebTable::find_first(TABLE_TH_TD).unwrap(); + let mut iter = table.iter(); + let row = iter.next().unwrap(); + + assert_eq!(None, row.get("")); + assert_eq!(None, row.get("foo")); + assert_eq!(Some("John"), row.get("Name")); + assert_eq!(Some("20"), row.get("Age")); + + assert_eq!(None, iter.next()); + } + + #[test] + fn test_row_get_complex() { + let table = WebTable::find_first(TABLE_COMPLEX).unwrap(); + let mut iter = table.iter(); + + let row = iter.next().unwrap(); + assert_eq!(Some("John"), row.get("Name")); + assert_eq!(Some("20"), row.get("Age")); + assert_eq!(None, row.get("Extra")); + + let row = iter.next().unwrap(); + assert_eq!(Some("May"), row.get("Name")); + assert_eq!(Some("30"), row.get("Age")); + assert_eq!(Some("foo"), row.get("Extra")); + + let row = iter.next().unwrap(); + assert_eq!(None, row.get("Name")); + assert_eq!(None, row.get("Age")); + assert_eq!(None, row.get("Extra")); + + let row = iter.next().unwrap(); + assert_eq!(Some("a"), row.get("Name")); + assert_eq!(Some("b"), row.get("Age")); + assert_eq!(Some("c"), row.get("Extra")); + + assert_eq!(None, iter.next()); + } + + #[test] + fn test_two_tables_row_get_complex() { + let tables = WebTable::find_all_tables(TWO_TABLES_COMPLEX).unwrap(); + let mut tables_iter = tables.iter(); + let table_1 = tables_iter.next().unwrap(); + let table_2 = tables_iter.next().unwrap(); + let mut iter_1 = table_1.iter(); + let mut iter_2 = table_2.iter(); + + let row_table_1 = iter_1.next().unwrap(); + let row_table_2 = iter_2.next().unwrap(); + assert_eq!(Some("John"), row_table_1.get("Name")); + assert_eq!(Some("20"), row_table_1.get("Age")); + assert_eq!(None, row_table_1.get("Extra")); + assert_eq!(Some("Carpenter"), row_table_2.get("Profession")); + assert_eq!(Some("Single"), row_table_2.get("Civil State")); + assert_eq!(None, row_table_2.get("Extra")); + + let row_table_1 = iter_1.next().unwrap(); + let row_table_2 = iter_2.next().unwrap(); + assert_eq!(Some("May"), row_table_1.get("Name")); + assert_eq!(Some("30"), row_table_1.get("Age")); + assert_eq!(Some("foo"), row_table_1.get("Extra")); + assert_eq!(Some("Mechanic"), row_table_2.get("Profession")); + assert_eq!(Some("Married"), row_table_2.get("Civil State")); + assert_eq!(Some("bar"), row_table_2.get("Extra")); + + let row_table_1 = iter_1.next().unwrap(); + let row_table_2 = iter_2.next().unwrap(); + assert_eq!(None, row_table_1.get("Name")); + assert_eq!(None, row_table_1.get("Age")); + assert_eq!(None, row_table_1.get("Extra")); + assert_eq!(None, row_table_2.get("Name")); + assert_eq!(None, row_table_2.get("Age")); + assert_eq!(None, row_table_2.get("Extra")); + + let row_table_1 = iter_1.next().unwrap(); + let row_table_2 = iter_2.next().unwrap(); + assert_eq!(Some("a"), row_table_1.get("Name")); + assert_eq!(Some("b"), row_table_1.get("Age")); + assert_eq!(Some("c"), row_table_1.get("Extra")); + assert_eq!(Some("e"), row_table_2.get("Profession")); + assert_eq!(Some("f"), row_table_2.get("Civil State")); + assert_eq!(Some("g"), row_table_2.get("Extra")); + + assert_eq!(None, iter_1.next()); + assert_eq!(None, iter_2.next()); + } + + #[test] + fn test_row_as_slice_without_headers() { + let table = WebTable::find_first(TABLE_TD).unwrap(); + let mut iter = table.iter(); + + assert_eq!(&["Name", "Age"], iter.next().unwrap().as_slice()); + assert_eq!(None, iter.next()); + } + + #[test] + fn test_row_as_slice_without_headers_two_tables() { + let tables = WebTable::find_all_tables(TWO_TABLES_TD).unwrap(); + let mut tables_iter = tables.iter(); + let table_1 = tables_iter.next().unwrap(); + let table_2 = tables_iter.next().unwrap(); + let mut iter_1 = table_1.iter(); + let mut iter_2 = table_2.iter(); + + assert_eq!(&["Name", "Age"], iter_1.next().unwrap().as_slice()); + assert_eq!( + &["Profession", "Civil State"], + iter_2.next().unwrap().as_slice() + ); + assert_eq!(None, iter_1.next()); + assert_eq!(None, iter_2.next()); + } + + #[test] + fn test_row_as_slice_with_headers() { + let table = WebTable::find_first(TABLE_TH_TD).unwrap(); + let mut iter = table.iter(); + + assert_eq!(&["John", "20"], iter.next().unwrap().as_slice()); + assert_eq!(None, iter.next()); + } + + #[test] + fn test_row_as_slice_with_headers_two_tables() { + let tables = WebTable::find_all_tables(TWO_TABLES_TH_TD).unwrap(); + let mut tables_iter = tables.iter(); + let table_1 = tables_iter.next().unwrap(); + let table_2 = tables_iter.next().unwrap(); + let mut iter_1 = table_1.iter(); + let mut iter_2 = table_2.iter(); + + assert_eq!(&["John", "20"], iter_1.next().unwrap().as_slice()); + assert_eq!(&["Mechanic", "Single"], iter_2.next().unwrap().as_slice()); + assert_eq!(None, iter_1.next()); + assert_eq!(None, iter_2.next()); + } + + #[test] + fn test_row_as_slice_complex() { + let table = WebTable::find_first(TABLE_COMPLEX).unwrap(); + let mut iter = table.iter(); + let empty: [&str; 0] = []; + + assert_eq!(&["John", "20"], iter.next().unwrap().as_slice()); + assert_eq!(&["May", "30", "foo"], iter.next().unwrap().as_slice()); + assert_eq!(&empty, iter.next().unwrap().as_slice()); + assert_eq!(&["a", "b", "c", "d"], iter.next().unwrap().as_slice()); + assert_eq!(None, iter.next()); + } + + #[test] + fn test_row_as_slice_complex_two_tables() { + let tables = WebTable::find_all_tables(TWO_TABLES_COMPLEX).unwrap(); + let mut tables_iter = tables.iter(); + let table_1 = tables_iter.next().unwrap(); + let table_2 = tables_iter.next().unwrap(); + let mut iter_1 = table_1.iter(); + let mut iter_2 = table_2.iter(); + let empty: [&str; 0] = []; + + assert_eq!(&["John", "20"], iter_1.next().unwrap().as_slice()); + assert_eq!(&["May", "30", "foo"], iter_1.next().unwrap().as_slice()); + assert_eq!(&empty, iter_1.next().unwrap().as_slice()); + assert_eq!(&["a", "b", "c", "d"], iter_1.next().unwrap().as_slice()); + assert_eq!(None, iter_1.next()); + assert_eq!(&["Carpenter", "Single"], iter_2.next().unwrap().as_slice()); + assert_eq!( + &["Mechanic", "Married", "bar"], + iter_2.next().unwrap().as_slice() + ); + assert_eq!(&empty, iter_2.next().unwrap().as_slice()); + assert_eq!(&["e", "f", "g", "h"], iter_2.next().unwrap().as_slice()); + assert_eq!(None, iter_2.next()); + } + + #[test] + fn test_row_iter_simple() { + let table = WebTable::find_first(TABLE_TD).unwrap(); + let row = table.iter().next().unwrap(); + let mut iter = row.iter(); + + assert_eq!(Some("Name"), iter.next().map(String::as_str)); + assert_eq!(Some("Age"), iter.next().map(String::as_str)); + assert_eq!(None, iter.next()); + } + + #[test] + fn test_row_iter_simple_two_tables() { + let tables = WebTable::find_all_tables(TWO_TABLES_TD).unwrap(); + let mut tables_iter = tables.iter(); + let table_1 = tables_iter.next().unwrap(); + let table_2 = tables_iter.next().unwrap(); + let row_1 = table_1.iter().next().unwrap(); + let row_2 = table_2.iter().next().unwrap(); + let mut iter_1 = row_1.iter(); + let mut iter_2 = row_2.iter(); + + assert_eq!(Some("Name"), iter_1.next().map(String::as_str)); + assert_eq!(Some("Age"), iter_1.next().map(String::as_str)); + assert_eq!(None, iter_1.next()); + assert_eq!(Some("Profession"), iter_2.next().map(String::as_str)); + assert_eq!(Some("Civil State"), iter_2.next().map(String::as_str)); + assert_eq!(None, iter_2.next()); + } + + #[test] + fn test_row_iter_complex() { + let table = WebTable::find_first(TABLE_COMPLEX).unwrap(); + let mut table_iter = table.iter(); + + let row = table_iter.next().unwrap(); + let mut iter = row.iter(); + assert_eq!(Some("John"), iter.next().map(String::as_str)); + assert_eq!(Some("20"), iter.next().map(String::as_str)); + assert_eq!(None, iter.next()); + + let row = table_iter.next().unwrap(); + let mut iter = row.iter(); + assert_eq!(Some("May"), iter.next().map(String::as_str)); + assert_eq!(Some("30"), iter.next().map(String::as_str)); + assert_eq!(Some("foo"), iter.next().map(String::as_str)); + assert_eq!(None, iter.next()); + + let row = table_iter.next().unwrap(); + let mut iter = row.iter(); + assert_eq!(None, iter.next()); + + let row = table_iter.next().unwrap(); + let mut iter = row.iter(); + assert_eq!(Some("a"), iter.next().map(String::as_str)); + assert_eq!(Some("b"), iter.next().map(String::as_str)); + assert_eq!(Some("c"), iter.next().map(String::as_str)); + assert_eq!(Some("d"), iter.next().map(String::as_str)); + assert_eq!(None, iter.next()); + } + + #[test] + fn test_row_iter_complex_two_tables() { + let tables = WebTable::find_all_tables(TWO_TABLES_COMPLEX).unwrap(); + let mut tables_iter = tables.iter(); + let mut table_1 = tables_iter.next().unwrap().iter(); + let mut table_2 = tables_iter.next().unwrap().iter(); + + let row_1 = table_1.next().unwrap(); + let row_2 = table_2.next().unwrap(); + let mut iter_1 = row_1.iter(); + let mut iter_2 = row_2.iter(); + assert_eq!(Some("John"), iter_1.next().map(String::as_str)); + assert_eq!(Some("20"), iter_1.next().map(String::as_str)); + assert_eq!(None, iter_1.next()); + assert_eq!(Some("Carpenter"), iter_2.next().map(String::as_str)); + assert_eq!(Some("Single"), iter_2.next().map(String::as_str)); + assert_eq!(None, iter_2.next()); + + let row_1 = table_1.next().unwrap(); + let row_2 = table_2.next().unwrap(); + let mut iter_1 = row_1.iter(); + let mut iter_2 = row_2.iter(); + assert_eq!(Some("May"), iter_1.next().map(String::as_str)); + assert_eq!(Some("30"), iter_1.next().map(String::as_str)); + assert_eq!(Some("foo"), iter_1.next().map(String::as_str)); + assert_eq!(None, iter_1.next()); + assert_eq!(Some("Mechanic"), iter_2.next().map(String::as_str)); + assert_eq!(Some("Married"), iter_2.next().map(String::as_str)); + assert_eq!(Some("bar"), iter_2.next().map(String::as_str)); + assert_eq!(None, iter_2.next()); + + let row_1 = table_1.next().unwrap(); + let row_2 = table_2.next().unwrap(); + let mut iter_1 = row_1.iter(); + let mut iter_2 = row_2.iter(); + assert_eq!(None, iter_1.next()); + assert_eq!(None, iter_2.next()); + + let row_1 = table_1.next().unwrap(); + let row_2 = table_2.next().unwrap(); + let mut iter_1 = row_1.iter(); + let mut iter_2 = row_2.iter(); + assert_eq!(Some("a"), iter_1.next().map(String::as_str)); + assert_eq!(Some("b"), iter_1.next().map(String::as_str)); + assert_eq!(Some("c"), iter_1.next().map(String::as_str)); + assert_eq!(Some("d"), iter_1.next().map(String::as_str)); + assert_eq!(None, iter_1.next()); + assert_eq!(Some("e"), iter_2.next().map(String::as_str)); + assert_eq!(Some("f"), iter_2.next().map(String::as_str)); + assert_eq!(Some("g"), iter_2.next().map(String::as_str)); + assert_eq!(Some("h"), iter_2.next().map(String::as_str)); + assert_eq!(None, iter_2.next()); + } +} +/* + #[test] + fn test_wikipedia_swapped_rows_columns() { + // empty columns + let cols = nu_protocol::value::Value { + value: nu_protocol::UntaggedValue::Primitive(nu_protocol::Primitive::String( + "".to_string(), + )), + tag: nu_source::Tag::unknown(), + }; + + // this table is taken straight from wikipedia with no changes + let table = retrieve_tables(HTML_TABLE_WIKIPEDIA_COLUMNS_AS_ROWS, &cols, true); + + let expected = vec![UntaggedValue::row(indexmap! { + "Stable release".to_string() => UntaggedValue::string("\n 2103 (16.0.13901.20400) / April\u{a0}13, 2021; 4 months ago\u{a0}(2021-04-13)[1]\n ").into(), + "Developer(s)".to_string() => UntaggedValue::string("Microsoft").into(), + "Operating system".to_string() => UntaggedValue::string("Microsoft Windows").into(), + "Type".to_string() => UntaggedValue::string("Spreadsheet").into(), + "License".to_string() => UntaggedValue::string("Trialware[2]").into(), + "".to_string() => UntaggedValue::string("").into(), + "Website".to_string() => UntaggedValue::string("products.office.com/en-us/excel").into(), + "Initial release".to_string() => UntaggedValue::string("1987; 34\u{a0}years ago\u{a0}(1987)").into(), + }).into()]; + + assert_eq!(table, expected); + } + + #[test] + fn test_wikipedia_table_with_column_headers() { + let cols = UntaggedValue::table(&[ + UntaggedValue::string("Format".to_string()).into(), + UntaggedValue::string("Extension".to_string()).into(), + UntaggedValue::string("Description".to_string()).into(), + ]) + .into(); + + // this table is taken straight from wikipedia with no changes + let table = retrieve_tables(HTML_TABLE_WIKIPEDIA_WITH_COLUMN_NAMES, &cols, true); + let expected = vec![ + UntaggedValue::row(indexmap! { + "Format".to_string() => UntaggedValue::string("Excel Workbook").into(), + "Extension".to_string() => UntaggedValue::string(".xlsx").into(), + "Description".to_string() => UntaggedValue::string("The default Excel 2007 and later workbook format. In reality, a Zip compressed archive with a directory structure of XML text documents. Functions as the primary + + //replacement for the former binary .xls format, although it does not support Excel macros for security reasons. Saving as .xlsx offers file size reduction over .xls[38]").into(), + }).into(), + UntaggedValue::row(indexmap! { + "Format".to_string() => UntaggedValue::string("Excel Macro-enabled Workbook").into(), + "Extension".to_string() => UntaggedValue::string(".xlsm").into(), + "Description".to_string() => UntaggedValue::string("As Excel Workbook, but with macro support.").into(), + }).into(), + UntaggedValue::row(indexmap! { + "Format".to_string() => UntaggedValue::string("Excel Binary Workbook").into(), + "Extension".to_string() => UntaggedValue::string(".xlsb").into(), + "Description".to_string() => UntaggedValue::string("As Excel Macro-enabled Workbook, but storing information in binary form rather than XML documents for opening and saving documents more quickly and efficiently. Intended especially for very large documents with tens of thousands of rows, and/or several hundreds of columns. This format is very useful for shrinking large Excel files as is often the case when doing data analysis.").into(), + }).into(), + UntaggedValue::row(indexmap! { + "Format".to_string() => UntaggedValue::string("Excel Macro-enabled Template").into(), + "Extension".to_string() => UntaggedValue::string(".xltm").into(), + "Description".to_string() => UntaggedValue::string("A template document that forms a basis for actual workbooks, with macro support. The replacement for the old .xlt format.").into(), + }).into(), + UntaggedValue::row(indexmap! { + "Format".to_string() => UntaggedValue::string("Excel Add-in").into(), + "Extension".to_string() => UntaggedValue::string(".xlam").into(), + "Description".to_string() => UntaggedValue::string("Excel add-in to add extra functionality and tools. Inherent macro support because of the file purpose.").into(), + }).into(), + ]; + + assert_eq!(table, expected); + } +*/