Skip to content

GraphemeCursor::next_boundary() returns incorrect boundary when chunk starts with ZWJ #118

Open
@sarah-quinones

Description

@sarah-quinones

https://play.rust-lang.org/?version=stable&mode=debug&edition=2021&gist=8fe7258129406b137529559157edd542

use unicode_segmentation::{GraphemeCursor, GraphemeIncomplete};

#[test]
fn test_graphemes_chunked() {
    use GraphemeIncomplete::*;

    let chunk0 = "👩"; // 4 bytes
    let chunk1 = "\u{200d}🔬"; // 3 bytes + 4 bytes
    let full = &(chunk0.to_string() + chunk1); // 👩‍🔬

    let mut cur = GraphemeCursor::new(0, full.len(), true);
    assert_eq!(cur.next_boundary(chunk0, 0), Err(NextChunk));
    match cur.next_boundary(chunk1, chunk0.len()) {
        Ok(res) => assert_eq!(res, Some(11)),
        Err(PreContext(_)) => {
            cur.provide_context(chunk0, 0);
            assert_eq!(cur.next_boundary(chunk1, chunk0.len()), Ok(Some(11)));
        }
        _ => unreachable!(),
    }
}

the last assert panics and says that it expected Ok(Some(7)), when instead it should be Ok(Some(11))

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions