Expose flag truncate-ragged-lines in polars open (#13939)

# Description
Introduces a new flag `--truncate-ragged-lines` for `polars open` that
will truncate lines that are longer than the schema.

# User-Facing Changes
- Introduction of the flag `--truncate-ragged-lines` for `polars open`
This commit is contained in:
Jack Wright 2024-09-27 04:54:46 -07:00 committed by GitHub
parent 5bef81a059
commit bcaef8959c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -97,6 +97,7 @@ impl PluginCommand for OpenDataFrame {
r#"Polars Schema in format [{name: str}]. CSV, JSON, and JSONL files"#, r#"Polars Schema in format [{name: str}]. CSV, JSON, and JSONL files"#,
Some('s') Some('s')
) )
.switch("truncate-ragged-lines", "Truncate lines that are longer than the schema. CSV file", None)
.input_output_type(Type::Any, Type::Custom("dataframe".into())) .input_output_type(Type::Any, Type::Custom("dataframe".into()))
.category(Category::Custom("dataframe".into())) .category(Category::Custom("dataframe".into()))
} }
@ -466,11 +467,11 @@ fn from_csv(
.unwrap_or(DEFAULT_INFER_SCHEMA); .unwrap_or(DEFAULT_INFER_SCHEMA);
let skip_rows: Option<usize> = call.get_flag("skip-rows")?; let skip_rows: Option<usize> = call.get_flag("skip-rows")?;
let columns: Option<Vec<String>> = call.get_flag("columns")?; let columns: Option<Vec<String>> = call.get_flag("columns")?;
let maybe_schema = call let maybe_schema = call
.get_flag("schema")? .get_flag("schema")?
.map(|schema| NuSchema::try_from(&schema)) .map(|schema| NuSchema::try_from(&schema))
.transpose()?; .transpose()?;
let truncate_ragged_lines: bool = call.has_flag("truncate-ragged-lines")?;
if !call.has_flag("eager")? { if !call.has_flag("eager")? {
let csv_reader = LazyCsvReader::new(file_path); let csv_reader = LazyCsvReader::new(file_path);
@ -496,14 +497,11 @@ fn from_csv(
} }
}; };
let csv_reader = csv_reader.with_has_header(!no_header); let csv_reader = csv_reader
.with_has_header(!no_header)
let csv_reader = match maybe_schema { .with_infer_schema_length(Some(infer_schema))
Some(schema) => csv_reader.with_schema(Some(schema.into())), .with_schema(maybe_schema.map(Into::into))
None => csv_reader, .with_truncate_ragged_lines(truncate_ragged_lines);
};
let csv_reader = csv_reader.with_infer_schema_length(Some(infer_schema));
let csv_reader = match skip_rows { let csv_reader = match skip_rows {
None => csv_reader, None => csv_reader,
@ -542,6 +540,7 @@ fn from_csv(
.unwrap_or(b','), .unwrap_or(b','),
) )
.with_encoding(CsvEncoding::LossyUtf8) .with_encoding(CsvEncoding::LossyUtf8)
.with_truncate_ragged_lines(truncate_ragged_lines)
}) })
.try_into_reader_with_file_path(Some(file_path.to_path_buf())) .try_into_reader_with_file_path(Some(file_path.to_path_buf()))
.map_err(|e| ShellError::GenericError { .map_err(|e| ShellError::GenericError {