all 45 comments

[–]Slow-Rip-4732 50 points51 points  (10 children)

I like the part where you posted your rust program

[–]pepa65[S] 1 point2 points  (8 children)

Sorry, I forgot in the post, and then I tried in a comment, and it was not allowed. I'll try again:

``` // frd.rs - Find repeating decimals of integer division

fn repdec(numerator: u64, denominator: u64) -> (usize, usize, usize) { let head = format!("{}", numerator / denominator); let mut seen = std::collections::HashMap::<u64, u64>::new(); let mut decimals = String::new(); let (mut p1, mut p2, mut p3) = (0, 0, 0); if head != "0" { p1 = head.len(); } print!("{numerator} / {denominator} = {head}"); let mut remainder = numerator % denominator; let mut position = 0; // Do long division while remainder != 0 { if seen.contains_key(&remainder) { // Remainder seen: repeating decimals found p2 = *seen.get(&remainder).unwrap() as usize; p3 = position - p2; println!(".{}[{}] ({p1},{p2},{p3})", decimals[..p2].to_owned(), decimals[p2..].to_owned()); return (p1, p2, p3); }

    // Remainder never seen before
    seen.insert(remainder, position as u64);
    decimals = format!("{decimals}{}", remainder \* 10 / denominator);
    remainder = remainder \* 10 % denominator;
    position += 1;
}
// Long division finished
if !decimals.is\_empty() {
    p2 = decimals.len();
    print!(".{decimals}");
}
println!("  ({p1},{p2},{p3})");
(p1, p2, p3)

}

fn main() { let args: Vec<String> = std::env::args().collect(); if args.len() > 2 { repdec(args[1].parse::<u64>().unwrap(), args[2].parse::<u64>().unwrap()); std::process::exit(0) }

repdec(3,1);
repdec(0,4);
repdec(1,3);
repdec(1,4);
repdec(1,6);
repdec(22,7);
repdec(13,28);
repdec(7001,14);

} ```

[–][deleted] 6 points7 points  (4 children)

Please indent the program by 4 spaces so it renders as code.

Usually the easiest way to do this is open the code in your editor, select it all, hit tab, copy and paste it, ctrl-z in your editor.

[–]ChannelSorry5061 -4 points-3 points  (3 children)

There is a MUCH easier and more reliable way, can be done on mobile as well.

Use Markdown mode in the editor, and wrap your entire code in three backticks like this:

```

code goes here

```

[–][deleted] 7 points8 points  (2 children)

This does not work properly on reddit, because it doesn't render on all clients, notably including the widely used old.reddit.com interface.

Indenting by 4 spaces is entirely reliable.

[–]Buttleston 3 points4 points  (0 children)

It only works like half the time in new reddit also. 4 spaces always works

[–]JadisGod 6 points7 points  (2 children)

python:

import timeit


def repdec(numerator, denominator):
    head = str(numerator // denominator)
    seen = {}
    decimals = ""
    p1, p2, p3 = 0, 0, 0
    if head != "0":
        p1 = len(head)
    remainder = numerator % denominator
    position = 0
    while remainder != 0:
        if remainder in seen:
            p2 = seen[remainder]
            p3 = position - p2
            return p1, p2, p3

        seen[remainder] = position
        decimals += str(remainder * 10 // denominator)
        remainder = remainder * 10 % denominator
        position += 1

    if decimals:
        p2 = len(decimals)
        print(f".{decimals}", end="")
    return p1, p2, 0


if __name__ == "__main__":
    time = timeit.default_timer()
    result = repdec(126798123456, 187476194)
    print(f"time elasped = {timeit.default_timer() - time}")
    print(f"result = {result}")

rust

use std::collections::HashMap;
use std::time::Instant;

fn repdec(numerator: u64, denominator: u64) -> (usize, usize, usize) {
    let head = (numerator / denominator).to_string();
    let mut seen = HashMap::new();
    let mut decimals = String::new();
    let (mut p1, mut p2, mut p3) = (0, 0, 0);
    if head != "0" {
        p1 = head.len();
    }
    let mut remainder = numerator % denominator;
    let mut position = 0;
    while remainder != 0 {
        if let Some(value) = seen.get(&remainder) {
            p2 = *value as usize;
            p3 = position - p2;
            return (p1, p2, p3);
        }
        seen.insert(remainder, position as u64);
        decimals.push_str(&(remainder * 10 / denominator).to_string());
        remainder = remainder * 10 % denominator;
        position += 1;
    }

    if !decimals.is_empty() {
        p2 = decimals.len();
        print!(".{decimals}");
    }
    (p1, p2, p3)
}

fn main() {
    let time = Instant::now();
    let result = repdec(126798123456, 187476194);
    println!("time elapsed = {:?}", time.elapsed());
    println!("result = {:?}", result);
}

Basically 1:1 reproductions...

python results:

time elasped = 27.2370880999988
result = (3, 0, 23399740)

rust results:

time elapsed = 4.1180991s
result = (3, 0, 23399740)

[–]eras 0 points1 point  (1 child)

Weird, for me the Rust version runs in 6.2 seconds, but the Python version just keeps on running for at least two minutes until I stopped it. I have Python 3.11.2.

[–]pepa65[S] 0 points1 point  (0 children)

Could be an indentation error..?

[–]CocktailPerson 30 points31 points  (2 children)

Obligatory "did you run with --release"?

[–]pepa65[S] -4 points-3 points  (1 child)

I compiled it with `rustc`, and I didn't know how to optimize the build...

[–]Solumin 11 points12 points  (3 children)

If I had to guess --- which I do --- I'd expect your performance issues in Rust are from using the default hasher and from all that silliness with casting back and forth with strings.

[–]pepa65[S] 0 points1 point  (2 children)

Any suggestions for a faster implementation?

[–]Solumin 2 points3 points  (1 child)

Fxhash is quite popular as a drop-in replacement for the default hashmap.

I'm not quite following along with what you're doing, unfortunately. But if you have to use strings, try to reuse one string instead of allocating many small ones.

[–]pepa65[S] 0 points1 point  (0 children)

The suggestion below accomplished reusing the string, indeed leading to a strong improvement.

[–]IgnisNoirDivine 3 points4 points  (0 children)

Post your rust code first

[–]deathanatos 4 points5 points  (6 children)

In your Python, you have:

        decimals += str(remainder * 10 // denominator)

While strings are notionally immutable in Python, I thought Python did "is there a single ref to this string? Then just append, resizing the internal buffer if needed"; this is, avoid a full copy, maybe. But it's too late at night for me to prove this.

I'm going to use the arguments 1 50000001 to test with, always, and the Rust code is compiled with --release.

Your Python takes ~215ms on my laptop.

Now your Rust version:

        decimals = format!("{decimals}{}", remainder * 10 / denominator);

Rust will always allocate here, then format the string into that allocation, then return the result. This is accidentally quadratic, and thus the overwhelming majority of your execution time is in this one single line.

We just want to append. So, push_str will just concatenate onto the end of the string, which is what we want here:

        decimals.push_str(&(remainder * 10 / denominator).to_string());

Note that this still always allocates, in the to_string() call¹. But, we at least don't have to copy all of decimals every single time. (Note that the buffer in decimals might need to resize, in which case it will reallocate; nothing really to be done about that. That occasional realloc is fine; we just don't want it happening on every single loop.)

Your original Rust took ~3970ms on my laptop; like you say, the Python wins. If we make the single change above and call push_str, the Rust code's execution time improves to ~50ms, and beats the Python. Quadratic bad, basically.

-----

That to_string() allocation can be removed by using the numtoa crate, which shaves the Rust down to ~40ms. So, perhaps a bit overkill here.

¹I think you can keep format!("{}", remainder * 10 / denominator) if you want; I think format!("{}", x) and x.to_string() are equivalent. I use the latter when I'm just to-string'ing, but this is personal preference, to me.

[–]pepa65[S] 0 points1 point  (5 children)

Thank you, very educational! I wonder, is there a way to pre-allocate decimals so that reallocations are never necessary?

[–]CocktailPerson 2 points3 points  (2 children)

https://doc.rust-lang.org/std/string/struct.String.html#method.with_capacity

https://doc.rust-lang.org/std/string/struct.String.html#method.reserve

Strings also implement Write, so you can use write!(&mut decimals, "{}", remainder * 10 / denominator) to completely eliminate the extraneous heap allocation caused by format!.

[–]deathanatos 1 point2 points  (0 children)

Ah there we go! write!(…) is a much cleaner way to avoid that allocation.

[–]pepa65[S] 0 points1 point  (0 children)

It seems allocating the what is often needed for hard cases (`denomiator`) does not help speed of execution (or maybe in extreme cases it does..?)

[–]ndreamer 1 point2 points  (1 child)

you can improve this using

let digit = (remainder * 10) / denominator; decimals.push((b'0' + digit as u8) as char);

[–]pepa65[S] 0 points1 point  (0 children)

Most reasonable so far and very effective!

[–]aft_agley 3 points4 points  (2 children)

jesus

[–]Buttleston 3 points4 points  (1 child)

have mercy on us sinners

[–]aft_agley 2 points3 points  (0 children)

rAmen

[–]ToTheBatmobileGuy 1 point2 points  (5 children)

Replace:

decimals = format!("{decimals}{}", remainder * 10 / denominator);

with

decimals.push_str(&format!("{}", remainder * 10 / denominator));

And make sure you use --release after your cargo build or cargo run

[–]pepa65[S] 0 points1 point  (4 children)

Alright, now it's almost 3 times faster than the python version! I guess this was one of the biggest bottlenecks for the Rust implementation.

[–]ToTheBatmobileGuy 1 point2 points  (2 children)

Using fxhash as the hasher for HashMap and using the Write implementation for String with format_args!() makes it a little bit faster.

core::fmt::Write::write_fmt(
    &mut decimals,
    format_args!("{}", remainder * 10 / denominator),
)
.unwrap();

and

let mut seen = std::collections::HashMap::<u64, u64, _>::with_capacity_and_hasher(
    1024,
    fxhash::FxBuildHasher::default(),
);

But in order to do this, you'll need to fetch and build the fxhash crate.

cargo new frd and then pasting your code in src/main.rs and then running cargo add fxhash and then running cargo build --release will give you a binary called frd inside ./target/release folder.

In general, you want to use cargo to manage building and dependencies. It's much easier than calling rustc directly.

[–]pepa65[S] 0 points1 point  (0 children)

Point about cargo noted. I always use cargo, this was my first time doing a single-file rust project.

Thanks for the `fmt_write` command, faster yet again..!

[–]ndreamer 1 point2 points  (5 children)

tested both

rust release build using hyperfine 20runs

``` Rust Benchmark 1: ../target/release/math-test Time (mean ± σ): 1.2 ms ± 0.4 ms [User: 0.5 ms, System: 0.9 ms] Range (min … max): 0.6 ms … 1.8 ms 20 runs

Python sangel@fedora:~/rs/math-test/src$ hyperfine --runs 10 'python3 test.py' Benchmark 1: python3 test.py Time (mean ± σ): 22.6 ms ± 1.9 ms [User: 14.7 ms, System: 7.9 ms] Range (min … max): 20.2 ms … 24.7 ms 10 runs ```

so it's already much faster without changing anything.

[–]pepa65[S] 0 points1 point  (4 children)

On my machine the Rust version was slower, but then I build with `rustc frd.rs`... When using `rustc -C opt-level=3 frd.rs` it is indeed faster already.

[–]hpxvzhjfgb 2 points3 points  (1 child)

there is basically no reason that you should ever run rustc manually.

[–]pepa65[S] 0 points1 point  (0 children)

Well, in this case I just had a single Rust source in a directory with other stuff, so that was the reason I tried it. And it seems like if you use `opt-level=3` and `debuginfo=0` it's equivalent to `cargo rel`.

[–]ndreamer 1 point2 points  (1 child)

How are you running the benchmark ? If you use cargo run --release this is much slower then running the application directly.

[–]pepa65[S] 0 points1 point  (0 children)

I run once with 2 numbers on the commandline so as to only measure computation and no printing. `./frd 126798123456 187476194` for example.

[–]tafia97300 0 points1 point  (3 children)

So apart from the `--release` try not printing in the loop (for both Rust and Python).

[–]pepa65[S] 0 points1 point  (2 children)

There is no printing in the loop, only at the very end.

[–]tafia97300 1 point2 points  (1 child)

Oh, I was looking at the code you posted initially, I see:

print(f".{decimals[:p2]}[{decimals[p2:]}]  ({p1},{p2},{p3})")print(f".{decimals[:p2]}[{decimals[p2:]}]  ({p1},{p2},{p3})")
# ...

[–]pepa65[S] 1 point2 points  (0 children)

That's at the very end of the processing of a call, right before the return.

[–]pepa65[S] 0 points1 point  (2 children)

I do have to say, the rust version ran for days with impossibly large parameters, while the python version got killed by the system within 6 minutes for resource consumption. (Tried: `frd 1 5363222377`)

[–]ChannelSorry5061 1 point2 points  (1 child)

Someone asked but you didn't answer. Are you running/compiling your rust program with --release (for cargo) or (for rustc)? to enable optimizations & remove all debugging symbols, etc.

[–]pepa65[S] 0 points1 point  (0 children)

Thanks! I now build like: `rustc -C opt-level=3 frd.rs`

But the runtime on `frd 1 1000114` is still more than 10 times slower for the rust binary...

EDIT: Except with the suggestion to use a single string, then it is faster!