From 767b85077c88506acb73f1ae302d20f7de064975 Mon Sep 17 00:00:00 2001 From: Michael Mikovsky <77305074+Astatin3@users.noreply.github.com> Date: Sun, 11 May 2025 21:50:51 -0600 Subject: [PATCH] Add "bloom" and file scan --- src/database.rs | 33 +++++ src/main.rs | 337 ++++++++++++++++++++++++++++-------------- src/parse_ip_range.rs | 35 +++++ tools/read_ips.py | 58 ++++++++ 4 files changed, 353 insertions(+), 110 deletions(-) create mode 100644 tools/read_ips.py diff --git a/src/database.rs b/src/database.rs index 7602092..afbb3e8 100644 --- a/src/database.rs +++ b/src/database.rs @@ -6,6 +6,7 @@ use std::{ }; use chrono::{DateTime, Utc}; +use rand::seq::IteratorRandom; use regex::Regex; use rocksdb::{Cache, ColumnFamily, DB, IteratorMode, Options, WriteBatch}; use serde::{Deserialize, Serialize}; @@ -450,6 +451,38 @@ impl ResultDatabase { Ok(matching_keys) } + pub fn get_random_result(&self) -> Result { + let db = Arc::new(DB::open_cf(&self.options, &self.path, &self.columns)?); + let cfs = vec![ + db.cf_handle(&self.columns[0]).unwrap(), + db.cf_handle(&self.columns[1]).unwrap(), + db.cf_handle(&self.columns[2]).unwrap(), + db.cf_handle(&self.columns[3]).unwrap(), + db.cf_handle(&self.columns[4]).unwrap(), + db.cf_handle(&self.columns[5]).unwrap(), + db.cf_handle(&self.columns[6]).unwrap(), + db.cf_handle(&self.columns[7]).unwrap(), + db.cf_handle(&self.columns[8]).unwrap(), + db.cf_handle(&self.columns[9]).unwrap(), + db.cf_handle(&self.columns[10]).unwrap(), + db.cf_handle(&self.columns[11]).unwrap(), + db.cf_handle(&self.columns[12]).unwrap(), + ]; + + let iter = db.iterator_cf(cfs[0], IteratorMode::Start); + let (key_bytes, _value_bytes) = iter + .choose(&mut rand::rng()) + .expect("Failed to aquire random") + .expect("Failed to aquire random"); + + let key_str = std::str::from_utf8(&key_bytes).expect("Failed to parse key str"); + let row = self + .fetch_row(&db, key_str, &cfs) + .expect("Failed to fetch row"); + + Ok(row) + } + pub fn search_substring_in_column_regex( &self, column: &str, diff --git a/src/main.rs b/src/main.rs index 938d826..2de0a2d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,8 @@ mod ports; +use lazy_static::lazy_static; use std::{ - cmp::min, + cmp::{max, min}, mem, net::{IpAddr, Ipv4Addr}, str::FromStr, @@ -10,17 +11,46 @@ use std::{ use clap::{Parser, Subcommand}; use parse_ip_range::parse_ip_targets; -use rand::{rng, seq::SliceRandom}; +use rand::{ + rng, + seq::{IteratorRandom, SliceRandom}, +}; use untitled::{ database::ResultDatabase, online_scan, - parse_ip_range::{self, generate_random_ipv4_addresses}, + parse_ip_range::{self, extract_ipv4_from_file, generate_random_ipv4_addresses}, port_scan::tcp_scan, query, service_scan::service_scan::scan_services, }; -const HILBERT_VIS_SIZE: usize = 256; +const EXCLUDE_IPS: &'static [&'static str] = &[ + "0.0.0.0/8", + "10.0.0.0/8", + "100.64.0.0/10", + "127.0.0.0/8", + "169.254.0.0/16", + "172.16.0.0/12", + "192.0.0.0/24", + "192.0.0.0/29", + "192.0.0.170/32", + "192.0.0.171/32", + "192.0.2.0/24", + "192.88.99.0/24", + "192.168.0.0/16", + "198.18.0.0/15", + "198.51.100.0/24", + "203.0.113.0/24", + "240.0.0.0/4", + "255.255.255.255/32", + "131.215.0.0/16", + "134.4.0.0/16", + "192.12.19.0/24", + "192.31.43.0/24", + "192.41.208.0/24", + "192.43.243.0/24", + "192.54.249.0/24", +]; /// A fictional versioning CLI #[derive(Debug, Parser)] @@ -36,6 +66,25 @@ enum Commands { /// Scans servers #[command(arg_required_else_help = true)] Scan { + #[command(subcommand)] + command: ScanCommands, + }, + /// Retrieves queries + #[command(arg_required_else_help = true)] + Search { + /// The search query + query: Vec, + /// Select N random results + #[arg(short, long, default_value_t = 0)] + random: usize, + }, +} + +#[derive(Debug, Subcommand, Clone)] +enum ScanCommands { + /// Scans list of servers + #[command(arg_required_else_help = true)] + List { /// List of remote servers hosts: String, @@ -55,11 +104,26 @@ enum Commands { #[arg(short, long, default_value_t = 100)] syn_tcp_delay_micros: u64, }, - /// Retrieves queries + /// Scans ips from file #[command(arg_required_else_help = true)] - Search { - /// The search query - query: Vec, + File { + /// List of remote servers + path: String, + /// Size of block of IPs to scan + #[arg(short, long, default_value_t = 4096)] + batch_size: usize, + /// The top N most common ports to scan + #[arg(short, long, default_value_t = 150)] + n_ports: usize, + /// Timeout for requests + #[arg(short, long, default_value_t = 3000)] + timeout_ms: u64, + /// Delay between icmp echo requests + #[arg(short, long, default_value_t = 80)] + ping_delay_micros: u64, + /// Delay between tcp syn packets + #[arg(short, long, default_value_t = 100)] + syn_tcp_delay_micros: u64, }, /// Rescans servers from search query Rescan { @@ -82,7 +146,7 @@ enum Commands { #[arg(short, long, default_value_t = 100)] syn_tcp_delay_micros: u64, }, - /// Scans random services + /// Continuously scans random ips Random { /// Size of block of IPs to scan #[arg(short, long, default_value_t = 4096)] @@ -100,6 +164,27 @@ enum Commands { #[arg(short, long, default_value_t = 100)] syn_tcp_delay_micros: u64, }, + /// Continuously scans blocks of ips around pre-scanned ips. + Bloom { + /// The amount of bits to include in cidr, 192.168.0.0/X + #[arg(short, long, default_value_t = 24)] + bits: usize, + /// Size of block of IPs to scan + #[arg(short, long, default_value_t = 4096)] + batch_size: usize, + /// The top N most common ports to scan + #[arg(short, long, default_value_t = 150)] + n_ports: usize, + /// Timeout for requests + #[arg(short, long, default_value_t = 3000)] + timeout_ms: u64, + /// Delay between icmp echo requests + #[arg(short, long, default_value_t = 80)] + ping_delay_micros: u64, + /// Delay between tcp syn packets + #[arg(short, long, default_value_t = 100)] + syn_tcp_delay_micros: u64, + }, } fn main() -> Result<(), Box> { @@ -107,128 +192,160 @@ fn main() -> Result<(), Box> { let database = ResultDatabase::new("ping_result_database"); match args.command { - Commands::Scan { - hosts, - batch_size, - n_ports, - timeout_ms, - ping_delay_micros, - syn_tcp_delay_micros, - } => { - let hosts = parse_ip_targets(&hosts)?; - - scan( - batch_size, - &database, - hosts, - ports::PORTS[0..n_ports].to_vec(), - Duration::from_millis(timeout_ms), - Duration::from_micros(ping_delay_micros), - Duration::from_micros(syn_tcp_delay_micros), - )?; - } - Commands::Rescan { - query, - batch_size, - n_ports, - timeout_ms, - ping_delay_micros, - syn_tcp_delay_micros, - } => { - let start = Instant::now(); - if let Ok(query) = query::search(query) { - let results = database.search(query); - if let Ok(results) = results { - let len = results.len(); - - let mut hosts: Vec = Vec::new(); - - for result in results { - println!("{}", result.to_string()); - hosts.push(IpAddr::from_str(result.ip.as_str()).unwrap()); - } - println!("{} results in {}ms", len, start.elapsed().as_millis()); - - hosts.sort(); - hosts.dedup(); - hosts.shuffle(&mut rng()); + Commands::Scan { command } => { + match command { + ScanCommands::List { + hosts, + batch_size, + n_ports, + timeout_ms, + ping_delay_micros, + syn_tcp_delay_micros, + } => { + let hosts = parse_ip_targets(&hosts)?; scan( batch_size, &database, hosts, ports::PORTS[0..n_ports].to_vec(), - // (1..65535).collect(), Duration::from_millis(timeout_ms), Duration::from_micros(ping_delay_micros), Duration::from_micros(syn_tcp_delay_micros), )?; } + ScanCommands::File { + path, + batch_size, + n_ports, + timeout_ms, + ping_delay_micros, + syn_tcp_delay_micros, + } => { + let hosts = extract_ipv4_from_file(&path)?; + + scan( + batch_size, + &database, + hosts, + ports::PORTS[0..n_ports].to_vec(), + Duration::from_millis(timeout_ms), + Duration::from_micros(ping_delay_micros), + Duration::from_micros(syn_tcp_delay_micros), + )?; + } + ScanCommands::Rescan { + query, + batch_size, + n_ports, + timeout_ms, + ping_delay_micros, + syn_tcp_delay_micros, + } => { + let start = Instant::now(); + if let Ok(query) = query::search(query) { + let results = database.search(query); + if let Ok(results) = results { + let len = results.len(); + + let mut hosts: Vec = Vec::new(); + + for result in results { + println!("{}", result.to_string()); + hosts.push(IpAddr::from_str(result.ip.as_str()).unwrap()); + } + println!("{} results in {}ms", len, start.elapsed().as_millis()); + + hosts.sort(); + hosts.dedup(); + hosts.shuffle(&mut rng()); + + scan( + batch_size, + &database, + hosts, + ports::PORTS[0..n_ports].to_vec(), + // (1..65535).collect(), + Duration::from_millis(timeout_ms), + Duration::from_micros(ping_delay_micros), + Duration::from_micros(syn_tcp_delay_micros), + )?; + } + } + } + ScanCommands::Random { + batch_size, + n_ports, + timeout_ms, + ping_delay_micros, + syn_tcp_delay_micros, + } => loop { + let hosts = generate_random_ipv4_addresses(batch_size, EXCLUDE_IPS.to_vec()); + + scan( + batch_size, + &database, + hosts, + ports::PORTS[0..n_ports].to_vec(), + Duration::from_millis(timeout_ms), + Duration::from_micros(ping_delay_micros), + Duration::from_micros(syn_tcp_delay_micros), + )?; + }, + ScanCommands::Bloom { + bits, + batch_size, + n_ports, + timeout_ms, + ping_delay_micros, + syn_tcp_delay_micros, + } => loop { + let host = database + .get_random_result() + .expect("Failed to get random host"); + let hosts = + parse_ip_range::parse_ip_targets(&(host.ip + "/" + &bits.to_string())) + .expect("Failed to parse ip range"); + + scan( + batch_size, + &database, + hosts, + ports::PORTS[0..n_ports].to_vec(), + Duration::from_millis(timeout_ms), + Duration::from_micros(ping_delay_micros), + Duration::from_micros(syn_tcp_delay_micros), + )?; + }, } } - Commands::Search { query } => { + Commands::Search { query, random } => { let start = Instant::now(); if let Ok(query) = query::search(query) { let results = database.search(query); if let Ok(results) = results { - let len = results.len(); - - for result in results { - println!("{}", result.to_string()); + let total_len = results.len(); + if random != 0 { + let local_len = min(random, total_len); + let results = results.iter().choose_multiple(&mut rng(), local_len); + for result in results { + println!("{}", result.to_string()); + } + println!( + "{} results in {}ms, selected {}", + total_len, + start.elapsed().as_millis(), + local_len + ); + } else { + for result in results { + println!("{}", result.to_string()); + } + println!("{} results in {}ms", total_len, start.elapsed().as_millis()); } - println!("{} results in {}ms", len, start.elapsed().as_millis()); } } } - Commands::Random { - batch_size, - n_ports, - timeout_ms, - ping_delay_micros, - syn_tcp_delay_micros, - } => loop { - let hosts = generate_random_ipv4_addresses( - batch_size, - vec![ - "0.0.0.0/8", - "10.0.0.0/8", - "100.64.0.0/10", - "127.0.0.0/8", - "169.254.0.0/16", - "172.16.0.0/12", - "192.0.0.0/24", - "192.0.0.0/29", - "192.0.0.170/32", - "192.0.0.171/32", - "192.0.2.0/24", - "192.88.99.0/24", - "192.168.0.0/16", - "198.18.0.0/15", - "198.51.100.0/24", - "203.0.113.0/24", - "240.0.0.0/4", - "255.255.255.255/32", - "131.215.0.0/16", - "134.4.0.0/16", - "192.12.19.0/24", - "192.31.43.0/24", - "192.41.208.0/24", - "192.43.243.0/24", - "192.54.249.0/24", - ], - ); - - scan( - batch_size, - &database, - hosts, - ports::PORTS[0..n_ports].to_vec(), - Duration::from_millis(timeout_ms), - Duration::from_micros(ping_delay_micros), - Duration::from_micros(syn_tcp_delay_micros), - )?; - }, - _ => {} } Ok(()) diff --git a/src/parse_ip_range.rs b/src/parse_ip_range.rs index ca34055..4c76c86 100644 --- a/src/parse_ip_range.rs +++ b/src/parse_ip_range.rs @@ -1,10 +1,15 @@ use std::{ + error::Error, + fs::File, + io::{BufRead, BufReader}, net::{IpAddr, Ipv4Addr}, + path::Path, str::FromStr, }; use pnet::ipnetwork::IpNetwork; use rand::{Rng, rng, seq::SliceRandom}; +use regex::Regex; // static MAX_HOSTS: u32 = 1024; @@ -156,3 +161,33 @@ pub fn generate_random_ipv4_addresses(count: usize, excluded_cidrs: Vec<&str>) - result } + +pub fn extract_ipv4_from_file>(filename: P) -> Result, Box> { + // Open the file + let file = File::open(filename)?; + let reader = BufReader::new(file); + + // IPv4 regex pattern: matches numbers 0-255 separated by periods + let ipv4_pattern = r"\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b"; + let re = Regex::new(ipv4_pattern).expect("Invalid regex pattern"); + + let mut ip_addresses = Vec::new(); + + // Process each line in the file + for line in reader.lines() { + let line = line?; + + // Find all matches in the current line + for ip_str in re.find_iter(&line) { + // Parse the found string as an IpAddr + if let Ok(ip) = ip_str.as_str().parse::() { + // Only collect IPv4 addresses (though our regex already ensures this) + if ip.is_ipv4() { + ip_addresses.push(ip); + } + } + } + } + + Ok(ip_addresses) +} diff --git a/tools/read_ips.py b/tools/read_ips.py new file mode 100644 index 0000000..2dfa9ae --- /dev/null +++ b/tools/read_ips.py @@ -0,0 +1,58 @@ +import struct +import socket +import sys + +def parse_binary_file(input_file, output_file): + """ + Parse a binary file containing IPv4 addresses and ports. + - First 4 bytes: IPv4 address + - Bytes 5-6: Port number in little endian + + Args: + input_file (str): Path to the binary input file + output_file (str): Path to the text output file + """ + try: + with open(input_file, 'rb') as f_in, open(output_file, 'w') as f_out: + # Read records until end of file + while True: + # Read 6 bytes (4 for IP, 2 for port) + record = f_in.read(6) + + # Break if EOF or incomplete record + if not record or len(record) < 6: + break + + # Extract IP (first 4 bytes) + ip_bytes = record[0:4] + ip_str = socket.inet_ntoa(ip_bytes) + + # Extract port (next 2 bytes) - little endian + port = struct.unpack('>H', record[4:6])[0] + + # Write to output file + output_line = f"{ip_str}:{port}\n" + f_out.write(output_line) + + print(f"Parsing complete. Results written to {output_file}") + + except FileNotFoundError: + print(f"Error: Input file '{input_file}' not found.") + return + except IOError as e: + print(f"I/O error: {e}") + return + except Exception as e: + print(f"Unexpected error: {e}") + return + +if __name__ == "__main__": + # Check command line arguments + if len(sys.argv) != 3: + print("Usage: python script.py input_binary_file output_text_file") + sys.exit(1) + + input_file = sys.argv[1] + output_file = sys.argv[2] + + parse_binary_file(input_file, output_file)