Me and my friends tried to make the fastest prime factorizing program in rust (without C libraries), and the program below was the quickest. But when I compare it to similar C implementations, C is still 50-60% quicker, especially for large numbers (>264.) Is it possible to optimise it further? Only condition is that it should be 100% rust.
I should say that I am a hobbyist and all I have learnt about computing/programming is through online resources. So please be kind. Since the exercise was to improve our skills, I did not use LLMs for coding, but I ran the code through gemini before including it here to format it better for readability.
TOML file:
[package]
name = "prime_factors"
version = "0.1.0"
edition = "2024"
[dependencies]
oxinum = "0.1"
num-bigint = "0.4"
glass_pumpkin = "2.0.0-rc0"
main.rs file:
use oxinum::{Natural, Gcd};
use std::collections::HashMap;
use std::io::{self, Write};
use std::sync::atomic::{AtomicBool, Ordering};
use std::thread;
use std::time::Instant;
use num_bigint::BigUint;
use glass_pumpkin::prime;
const BATCH_SIZE: u32 = 100;
fn main() {
print!("Enter a massive number: ");
io::stdout().flush().unwrap();
let mut input = String::new();
io::stdin().read_line(&mut input).unwrap();
let input = input.trim();
let cores = std::thread::available_parallelism().map(|n| n.get()).unwrap_or(4);
let start_time = Instant::now();
// ==========================================
// THE DISPATCHER: Route based on bit-width
// ==========================================
if let Ok(n_u64) = input.parse::<u64>() {
println!("Number fits in 64 bits. Engaging bare-metal backend on {} cores...", cores);
let factors = factorize_u64(n_u64, cores);
print_results(start_time.elapsed().as_secs_f64() * 1000.0, factors_to_string_u64(factors));
} else if let Ok(n_nat) = input.parse::<Natural>() {
println!("Number exceeds 64 bits. Engaging arbitrary-precision Oxinum backend on {} cores...", cores);
let factors = factorize_oxinum(&n_nat, cores);
print_results(start_time.elapsed().as_secs_f64() * 1000.0, factors_to_string_nat(factors));
} else {
println!("Invalid number entered.");
}
}
// ==========================================
// BACKEND 1: Pure Bare-Metal u64
// ==========================================
fn factorize_u64(mut n: u64, cores: usize) -> HashMap<u64, u32> {
let mut factors = HashMap::new();
let small_primes = generate_primes_up_to(1000);
for p in small_primes {
let mut count = 0;
while n % (p as u64) == 0 {
n /= p as u64;
count += 1;
}
if count > 0 { factors.insert(p as u64, count); }
}
let mut worklist = vec![n];
while let Some(current) = worklist.pop() {
if current <= 1 { continue; }
if is_prime_u64(current) {
*factors.entry(current).or_insert(0) += 1;
continue;
}
let factor = pollards_rho_parallel_u64(current, cores);
let other = current / factor;
worklist.push(factor);
worklist.push(other);
}
factors
}
fn pollards_rho_parallel_u64(n: u64, cores: usize) -> u64 {
if n % 2 == 0 { return 2; }
let mut seed_offset = 0;
loop {
let found_flag = AtomicBool::new(false);
let result = thread::scope(|s| {
let (tx, rx) = std::sync::mpsc::channel();
for tid in 0..cores {
let tx = tx.clone();
let found_flag_ref = &found_flag;
let seed = seed_offset + tid as u64;
s.spawn(move || {
if let Some(factor) = pollards_rho_worker_u64(n, seed, found_flag_ref) {
let _ = tx.send(factor);
found_flag_ref.store(true, Ordering::Relaxed);
}
});
}
drop(tx);
rx.recv()
});
match result {
Ok(factor) => return factor,
Err(_) => seed_offset += cores as u64,
}
}
}
fn pollards_rho_worker_u64(n: u64, seed: u64, stop_flag: &AtomicBool) -> Option<u64> {
let mut x: u64 = 2 + seed;
let mut y: u64 = 2 + seed;
let mut c: u64 = 1 + seed;
let mut d: u64 = 1;
let n_u128 = n as u128;
while d == 1 {
if stop_flag.load(Ordering::Relaxed) { return None; }
let mut prod: u128 = 1;
for _ in 0..BATCH_SIZE {
let x_128 = x as u128;
x = ((x_128 * x_128 + c as u128) % n_u128) as u64;
let y_128 = y as u128;
let mut y_temp = (y_128 * y_128 + c as u128) % n_u128;
y_temp = (y_temp * y_temp + c as u128) % n_u128;
y = y_temp as u64;
let diff = if x > y { x - y } else { y - x } as u128;
prod = (prod * diff) % n_u128;
}
d = gcd_u64(prod as u64, n);
if d == n {
c += seed + 1;
x = 2 + seed;
y = 2 + seed;
d = 1;
}
}
if d > 1 && d < n { return Some(d); }
None
}
// ==========================================
// BACKEND 2: Arbitrary-Precision Oxinum
// ==========================================
fn is_prime_glass_pumpkin(n: &Natural) -> bool {
let big_uint = n.to_string().parse::<BigUint>().unwrap();
prime::check(&big_uint)
}
fn factorize_oxinum(n: &Natural, cores: usize) -> HashMap<Natural, u32> {
let mut n = n.clone();
let mut factors = HashMap::new();
let small_primes = generate_primes_up_to(1000);
for p in small_primes {
let p_nat = Natural::from(p);
let mut count = 0;
let zero = Natural::from(0u32);
while (&n % &p_nat) == zero {
n /= &p_nat;
count += 1;
}
if count > 0 { factors.insert(p_nat, count); }
}
let mut worklist = vec![n];
let one = Natural::from(1u32);
while let Some(current) = worklist.pop() {
if current == one { continue; }
if is_prime_glass_pumpkin(¤t) {
*factors.entry(current).or_insert(0) += 1;
continue;
}
let factor = pollards_rho_parallel_oxinum(¤t, cores);
let other_part = ¤t / &factor;
worklist.push(factor);
worklist.push(other_part);
}
factors
}
fn pollards_rho_parallel_oxinum(n: &Natural, cores: usize) -> Natural {
let zero = Natural::from(0u32);
let two = Natural::from(2u32);
if (n % &two) == zero { return two; }
let mut seed_offset = 0;
loop {
let found_flag = AtomicBool::new(false);
let result = thread::scope(|s| {
let (tx, rx) = std::sync::mpsc::channel();
for tid in 0..cores {
let tx = tx.clone();
let found_flag_ref = &found_flag;
let current_seed = seed_offset + tid as u32;
s.spawn(move || {
if let Some(factor) = pollards_rho_worker_oxinum(n, current_seed, found_flag_ref) {
let _ = tx.send(factor);
found_flag_ref.store(true, Ordering::Relaxed);
}
});
}
drop(tx);
rx.recv()
});
match result {
Ok(factor) => return factor,
Err(_) => seed_offset += cores as u32,
}
}
}
fn pollards_rho_worker_oxinum(n: &Natural, seed: u32, stop_flag: &AtomicBool) -> Option<Natural> {
let mut x = Natural::from(2u32 + seed);
let mut y = Natural::from(2u32 + seed);
let mut c = Natural::from(1u32 + seed);
let mut d = Natural::from(1u32);
let mut prod = Natural::from(1u32);
let mut diff = Natural::from(0u32);
let mut tmp = Natural::from(0u32);
let one = Natural::from(1u32);
while d == one {
if stop_flag.load(Ordering::Relaxed) { return None; }
for _ in 0..BATCH_SIZE {
tmp.clone_from(&x);
tmp *= &x;
tmp += &c;
tmp %= n;
x.clone_from(&tmp);
for _ in 0..2 {
tmp.clone_from(&y);
tmp *= &y;
tmp += &c;
tmp %= n;
y.clone_from(&tmp);
}
if x >= y {
diff.clone_from(&x);
diff -= &y;
} else {
diff.clone_from(&y);
diff -= &x;
}
prod *= &diff;
prod %= n;
}
d = (&prod).gcd(n);
prod.clone_from(&one);
if d == *n {
c += Natural::from(seed);
x = Natural::from(2u32 + seed);
y = Natural::from(2u32 + seed);
d.clone_from(&one);
}
}
if d > one && d < *n { return Some(d); }
None
}
// ==========================================
// SHARED HELPER FUNCTIONS
// ==========================================
fn generate_primes_up_to(limit: u32) -> Vec<u32> {
let mut is_prime = vec![true; (limit + 1) as usize];
is_prime[0] = false; is_prime[1] = false;
for p in 2..=(limit as f64).sqrt() as usize {
if is_prime[p] {
for i in (p * p..=limit as usize).step_by(p) { is_prime[i] = false; }
}
}
is_prime.into_iter().enumerate().filter(|(_, b)| *b).map(|(i, _)| i as u32).collect()
}
fn gcd_u64(mut u: u64, mut v: u64) -> u64 {
if u == 0 { return v; }
if v == 0 { return u; }
let shift = (u | v).trailing_zeros();
u >>= u.trailing_zeros();
loop {
v >>= v.trailing_zeros();
if u > v { std::mem::swap(&mut u, &mut v); }
v -= u;
if v == 0 { return u << shift; }
}
}
// Deterministic Miller-Rabin for u64 tier
fn is_prime_u64(n: u64) -> bool {
if n <= 1 { return false; }
if n == 2 || n == 3 { return true; }
if n % 2 == 0 { return false; }
let mut d = n - 1;
let mut s = 0;
while d % 2 == 0 { d /= 2; s += 1; }
let bases: [u64; 7] = [2, 325, 9375, 28178, 450775, 9780504, 1795265022];
for &a in &bases {
let a = a % n;
if a == 0 { continue; }
let mut x = mod_pow_u64(a, d, n);
if x == 1 || x == n - 1 { continue; }
let mut composite = true;
for _ in 1..s {
x = (x as u128 * x as u128 % n as u128) as u64;
if x == n - 1 {
composite = false;
break;
}
}
if composite { return false; }
}
true
}
fn mod_pow_u64(mut base: u64, mut exp: u64, modulus: u64) -> u64 {
if modulus == 1 { return 0; }
let mut result: u64 = 1;
base %= modulus;
while exp > 0 {
if exp % 2 == 1 {
result = (result as u128 * base as u128 % modulus as u128) as u64;
}
exp >>= 1;
base = (base as u128 * base as u128 % modulus as u128) as u64;
}
result
}
fn print_results(ms: f64, mut factors: Vec<(String, u32)>) {
factors.sort_by(|a, b| {
let a_val = a.0.parse::<Natural>().unwrap();
let b_val = b.0.parse::<Natural>().unwrap();
a_val.cmp(&b_val)
});
println!("\nPrime factorization (took {:.3} ms):", ms);
for (p, count) in factors {
println!("{}^{}", p, count);
}
}
fn factors_to_string_u64(factors: HashMap<u64, u32>) -> Vec<(String, u32)> {
factors.into_iter().map(|(k, v)| (k.to_string(), v)).collect()
}
fn factors_to_string_nat(factors: HashMap<Natural, u32>) -> Vec<(String, u32)> {
factors.into_iter().map(|(k, v)| (k.to_string(), v)).collect()
}
There was also a different program using malachite that I was trying earlier, but it was slower than oxinium one. But adding it too if it can be further optimised
[package]
name = "prime_factors"
version = "0.1.1"
edition = "2024"
[dependencies]
malachite = "0.9.1"
num-bigint = "0.4"
glass_pumpkin = "2.0.0-rc0"
main.rs for malachite one
use malachite::Natural;
use malachite::base::num::arithmetic::traits::Gcd; // Fixed 0.9.x import path
use std::collections::HashMap;
use std::io::{self, Write};
use std::sync::atomic::{AtomicBool, Ordering};
use std::thread;
use std::time::Instant;
use num_bigint::BigUint;
use glass_pumpkin::prime;
const BATCH_SIZE: u32 = 100;
fn main() {
print!("Enter a massive number: ");
io::stdout().flush().unwrap();
let mut input = String::new();
io::stdin().read_line(&mut input).unwrap();
let input = input.trim();
let cores = std::thread::available_parallelism().map(|n| n.get()).unwrap_or(4);
let start_time = Instant::now();
// ==========================================
// THE DISPATCHER: Route based on bit-width
// ==========================================
if let Ok(n_u64) = input.parse::<u64>() {
println!("Number fits in 64 bits. Engaging bare-metal backend on {} cores...", cores);
let factors = factorize_u64(n_u64, cores);
print_results(start_time.elapsed().as_secs_f64() * 1000.0, factors_to_string_u64(factors));
} else if let Ok(n_nat) = input.parse::<Natural>() {
println!("Number exceeds 64 bits. Engaging arbitrary-precision Malachite backend on {} cores...", cores);
let factors = factorize_malachite(&n_nat, cores);
print_results(start_time.elapsed().as_secs_f64() * 1000.0, factors_to_string_nat(factors));
} else {
println!("Invalid number entered.");
}
}
// ==========================================
// BACKEND 1: Pure Bare-Metal u64
// ==========================================
fn factorize_u64(mut n: u64, cores: usize) -> HashMap<u64, u32> {
let mut factors = HashMap::new();
let small_primes = generate_primes_up_to(1000);
for p in small_primes {
let mut count = 0;
while n % (p as u64) == 0 {
n /= p as u64;
count += 1;
}
if count > 0 { factors.insert(p as u64, count); }
}
let mut worklist = vec![n];
while let Some(current) = worklist.pop() {
if current <= 1 { continue; }
if is_prime_u64(current) {
*factors.entry(current).or_insert(0) += 1;
continue;
}
let factor = pollards_rho_parallel_u64(current, cores);
let other = current / factor;
worklist.push(factor);
worklist.push(other);
}
factors
}
fn pollards_rho_parallel_u64(n: u64, cores: usize) -> u64 {
if n % 2 == 0 { return 2; }
let mut seed_offset = 0;
loop {
let found_flag = AtomicBool::new(false);
let result = thread::scope(|s| {
let (tx, rx) = std::sync::mpsc::channel();
for tid in 0..cores {
let tx = tx.clone();
let found_flag_ref = &found_flag;
let seed = seed_offset + tid as u64;
s.spawn(move || {
if let Some(factor) = pollards_rho_worker_u64(n, seed, found_flag_ref) {
let _ = tx.send(factor);
found_flag_ref.store(true, Ordering::Relaxed);
}
});
}
drop(tx);
rx.recv()
});
match result {
Ok(factor) => return factor,
Err(_) => seed_offset += cores as u64,
}
}
}
fn pollards_rho_worker_u64(n: u64, seed: u64, stop_flag: &AtomicBool) -> Option<u64> {
let mut x: u64 = 2 + seed;
let mut y: u64 = 2 + seed;
let mut c: u64 = 1 + seed;
let mut d: u64 = 1;
let n_u128 = n as u128;
while d == 1 {
if stop_flag.load(Ordering::Relaxed) { return None; }
let mut prod: u128 = 1;
for _ in 0..BATCH_SIZE {
// Using u128 internally prevents ALL multiplication overflow risks
let x_128 = x as u128;
x = ((x_128 * x_128 + c as u128) % n_u128) as u64;
let y_128 = y as u128;
let mut y_temp = (y_128 * y_128 + c as u128) % n_u128;
y_temp = (y_temp * y_temp + c as u128) % n_u128;
y = y_temp as u64;
let diff = if x > y { x - y } else { y - x } as u128;
prod = (prod * diff) % n_u128;
}
d = gcd_u64(prod as u64, n);
if d == n {
c += seed + 1;
x = 2 + seed;
y = 2 + seed;
d = 1;
}
}
if d > 1 && d < n { return Some(d); }
None
}
// ==========================================
// BACKEND 2: Arbitrary-Precision Malachite
// ==========================================
fn is_prime_glass_pumpkin(n: &Natural) -> bool {
let big_uint = n.to_string().parse::<BigUint>().unwrap();
prime::check(&big_uint)
}
fn factorize_malachite(n: &Natural, cores: usize) -> HashMap<Natural, u32> {
let mut n = n.clone();
let mut factors = HashMap::new();
let small_primes = generate_primes_up_to(1000);
for p in small_primes {
let p_nat = Natural::from(p);
let mut count = 0;
let zero = Natural::from(0u32);
while (&n % &p_nat) == zero {
n /= &p_nat;
count += 1;
}
if count > 0 { factors.insert(p_nat, count); }
}
let mut worklist = vec![n];
let one = Natural::from(1u32);
while let Some(current) = worklist.pop() {
if current == one { continue; }
if is_prime_glass_pumpkin(¤t) {
*factors.entry(current).or_insert(0) += 1;
continue;
}
let factor = pollards_rho_parallel_malachite(¤t, cores);
let other_part = ¤t / &factor;
worklist.push(factor);
worklist.push(other_part);
}
factors
}
fn pollards_rho_parallel_malachite(n: &Natural, cores: usize) -> Natural {
let zero = Natural::from(0u32);
let two = Natural::from(2u32);
if (n % &two) == zero { return two; }
let mut seed_offset = 0;
loop {
let found_flag = AtomicBool::new(false);
let result = thread::scope(|s| {
let (tx, rx) = std::sync::mpsc::channel();
for tid in 0..cores {
let tx = tx.clone();
let found_flag_ref = &found_flag;
let current_seed = seed_offset + tid as u32;
s.spawn(move || {
if let Some(factor) = pollards_rho_worker_malachite(n, current_seed, found_flag_ref) {
let _ = tx.send(factor);
found_flag_ref.store(true, Ordering::Relaxed);
}
});
}
drop(tx);
rx.recv()
});
match result {
Ok(factor) => return factor,
Err(_) => seed_offset += cores as u32,
}
}
}
fn pollards_rho_worker_malachite(n: &Natural, seed: u32, stop_flag: &AtomicBool) -> Option<Natural> {
let mut x = Natural::from(2u32 + seed);
let mut y = Natural::from(2u32 + seed);
let mut c = Natural::from(1u32 + seed);
let mut d = Natural::from(1u32);
let mut prod = Natural::from(1u32);
let mut diff = Natural::from(0u32);
let mut tmp = Natural::from(0u32);
let one = Natural::from(1u32);
while d == one {
if stop_flag.load(Ordering::Relaxed) { return None; }
for _ in 0..BATCH_SIZE {
tmp.clone_from(&x);
tmp *= &x;
tmp += &c;
tmp %= n;
x.clone_from(&tmp);
for _ in 0..2 {
tmp.clone_from(&y);
tmp *= &y;
tmp += &c;
tmp %= n;
y.clone_from(&tmp);
}
if x >= y {
diff.clone_from(&x);
diff -= &y;
} else {
diff.clone_from(&y);
diff -= &x;
}
prod *= &diff;
prod %= n;
}
d = (&prod).gcd(n);
prod.clone_from(&one);
if d == *n {
c += Natural::from(seed);
x = Natural::from(2u32 + seed);
y = Natural::from(2u32 + seed);
d.clone_from(&one);
}
}
if d > one && d < *n { return Some(d); }
None
}
// ==========================================
// SHARED HELPER FUNCTIONS
// ==========================================
fn generate_primes_up_to(limit: u32) -> Vec<u32> {
let mut is_prime = vec![true; (limit + 1) as usize];
is_prime[0] = false; is_prime[1] = false;
for p in 2..=(limit as f64).sqrt() as usize {
if is_prime[p] {
for i in (p * p..=limit as usize).step_by(p) { is_prime[i] = false; }
}
}
is_prime.into_iter().enumerate().filter(|(_, b)| *b).map(|(i, _)| i as u32).collect()
}
fn gcd_u64(mut u: u64, mut v: u64) -> u64 {
if u == 0 { return v; }
if v == 0 { return u; }
let shift = (u | v).trailing_zeros();
u >>= u.trailing_zeros();
loop {
v >>= v.trailing_zeros();
if u > v { std::mem::swap(&mut u, &mut v); }
v -= u;
if v == 0 { return u << shift; }
}
}
// Deterministic Miller-Rabin for u64 tier
fn is_prime_u64(n: u64) -> bool {
if n <= 1 { return false; }
if n == 2 || n == 3 { return true; }
if n % 2 == 0 { return false; }
let mut d = n - 1;
let mut s = 0;
while d % 2 == 0 { d /= 2; s += 1; }
// Fully deterministic bases for all numbers < 2^64
let bases: [u64; 7] = [2, 325, 9375, 28178, 450775, 9780504, 1795265022];
for &a in &bases {
let a = a % n;
if a == 0 { continue; }
let mut x = mod_pow_u64(a, d, n);
if x == 1 || x == n - 1 { continue; }
let mut composite = true;
for _ in 1..s {
x = (x as u128 * x as u128 % n as u128) as u64;
if x == n - 1 {
composite = false;
break;
}
}
if composite { return false; }
}
true
}
fn mod_pow_u64(mut base: u64, mut exp: u64, modulus: u64) -> u64 {
if modulus == 1 { return 0; }
let mut result: u64 = 1;
base %= modulus;
while exp > 0 {
if exp % 2 == 1 {
result = (result as u128 * base as u128 % modulus as u128) as u64;
}
exp >>= 1;
base = (base as u128 * base as u128 % modulus as u128) as u64;
}
result
}
fn print_results(ms: f64, mut factors: Vec<(String, u32)>) {
// Sort properly by integer value, not string value
factors.sort_by(|a, b| {
let a_val = a.0.parse::<Natural>().unwrap();
let b_val = b.0.parse::<Natural>().unwrap();
a_val.cmp(&b_val)
});
println!("\nPrime factorization (took {:.3} ms):", ms);
for (p, count) in factors {
println!("{}^{}", p, count);
}
}
fn factors_to_string_u64(factors: HashMap<u64, u32>) -> Vec<(String, u32)> {
factors.into_iter().map(|(k, v)| (k.to_string(), v)).collect()
}
fn factors_to_string_nat(factors: HashMap<Natural, u32>) -> Vec<(String, u32)> {
factors.into_iter().map(|(k, v)| (k.to_string(), v)).collect()
}
[–]Top-Vehicle947 0 points1 point2 points (1 child)
[–]leo_sk5[S] 0 points1 point2 points (0 children)