Let's try testing for "is not quadratic" condition

This commit is contained in:
Aleksey Kladov 2021-04-06 21:59:47 +03:00
parent 00cdbceb9d
commit e012efca27
3 changed files with 100 additions and 1 deletions

View file

@ -76,7 +76,7 @@ jobs:
run: cargo test --no-run --locked
- name: Test
run: cargo test
run: cargo test -- --nocapture
- name: Prepare cache
run: cargo xtask pre-cache

View file

@ -1,5 +1,8 @@
use std::time::Instant;
use expect_test::{expect_file, ExpectFile};
use ide_db::SymbolKind;
use stdx::format_to;
use test_utils::{bench, bench_fixture, skip_slow_tests};
use crate::{fixture, FileRange, HlTag, TextRange};
@ -257,6 +260,99 @@ fn benchmark_syntax_highlighting_long_struct() {
assert_eq!(hash, 2001);
}
#[test]
fn syntax_highlighting_not_quadratic() {
if skip_slow_tests() {
return;
}
let mut measures = Vec::new();
for i in 6..=10 {
let n = 1 << i;
let fixture = bench_fixture::big_struct_n(n);
let (analysis, file_id) = fixture::file(&fixture);
let time = Instant::now();
let hash = analysis
.highlight(file_id)
.unwrap()
.iter()
.filter(|it| it.highlight.tag == HlTag::Symbol(SymbolKind::Struct))
.count();
assert!(hash > n as usize);
let elapsed = time.elapsed();
measures.push((n as f64, elapsed.as_millis() as f64))
}
assert_linear(&measures)
}
/// Checks that a set of measurements looks like a liner function rather than
/// like a quadratic function. Algorithm:
///
/// 1. Linearly scale input to be in [0; 1)
/// 2. Using linear regression, compute the best linear function approximating
/// the input.
/// 3. Compute RMSE and maximal absolute error.
/// 4. Check that errors are within tolerances and that the constant term is not
/// too negative.
///
/// Ideally, we should use a proper "model selection" to directly compare
/// quadratic and linear models, but that sounds rather complicated:
///
/// https://stats.stackexchange.com/questions/21844/selecting-best-model-based-on-linear-quadratic-and-cubic-fit-of-data
fn assert_linear(xy: &[(f64, f64)]) {
let (mut xs, mut ys): (Vec<_>, Vec<_>) = xy.iter().copied().unzip();
normalize(&mut xs);
normalize(&mut ys);
let xy = xs.iter().copied().zip(ys.iter().copied());
// Linear regression: finding a and b to fit y = a + b*x.
let mean_x = mean(&xs);
let mean_y = mean(&ys);
let b = {
let mut num = 0.0;
let mut denom = 0.0;
for (x, y) in xy.clone() {
num += (x - mean_x) * (y - mean_y);
denom += (x - mean_x).powi(2);
}
num / denom
};
let a = mean_y - b * mean_x;
let mut plot = format!("y_pred = {:.3} + {:.3} * x\n\nx y y_pred\n", a, b);
let mut se = 0.0;
let mut max_error = 0.0f64;
for (x, y) in xy {
let y_pred = a + b * x;
se += (y - y_pred).powi(2);
max_error = max_error.max((y_pred - y).abs());
format_to!(plot, "{:.3} {:.3} {:.3}\n", x, y, y_pred);
}
let rmse = (se / xs.len() as f64).sqrt();
format_to!(plot, "\nrmse = {:.3} max error = {:.3}", rmse, max_error);
assert!(rmse < 0.05 && max_error < 0.1 && a > -0.1, "\nLooks quadratic\n{}", plot);
fn normalize(xs: &mut Vec<f64>) {
let max = xs.iter().copied().max_by(|a, b| a.partial_cmp(b).unwrap()).unwrap();
xs.iter_mut().for_each(|it| *it /= max);
}
fn mean(xs: &[f64]) -> f64 {
xs.iter().copied().sum::<f64>() / (xs.len() as f64)
}
}
#[test]
fn benchmark_syntax_highlighting_parser() {
if skip_slow_tests() {

View file

@ -8,7 +8,10 @@ use crate::project_root;
pub fn big_struct() -> String {
let n = 1_000;
big_struct_n(n)
}
pub fn big_struct_n(n: u32) -> String {
let mut buf = "pub struct RegisterBlock {".to_string();
for i in 0..n {
format_to!(buf, " /// Doc comment for {}.\n", i);