commit 74b81bf5e81819269b7b8fff2223982ba662e404 Author: Simon Bernier St-Pierre Date: Sun Dec 11 04:00:01 2016 -0500 initial commit, bencode decoding diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..eb5a316 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +target diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..ea729d6 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "magnolia" +version = "0.1.0" +authors = ["Simon Bernier St-Pierre "] + +[dependencies] diff --git a/examples/decode-torrent.rs b/examples/decode-torrent.rs new file mode 100644 index 0000000..e2dc744 --- /dev/null +++ b/examples/decode-torrent.rs @@ -0,0 +1,24 @@ +extern crate magnolia; + +use std::env; +use std::fs::File; +use std::io::{self, Read}; +use std::str; + +use magnolia::bencode::*; + +fn load_file(path: &str) -> io::Result<()> { + let mut buf = Vec::new(); + let mut f = File::open(path)?; + f.read_to_end(&mut buf)?; + + let obj = decode(&buf).unwrap(); + println!("{:#?}", obj); + + Ok(()) +} + +fn main() { + let path = env::args().nth(1).expect("need path to .torrent file"); + load_file(&path).unwrap(); +} diff --git a/src/bencode/decode.rs b/src/bencode/decode.rs new file mode 100644 index 0000000..8aeb852 --- /dev/null +++ b/src/bencode/decode.rs @@ -0,0 +1,143 @@ +use std::collections::BTreeMap; +use std::num::ParseIntError; +use std::str::{self, Utf8Error}; + +use bencode::Object; +use buffer::Buffer; + +#[derive(Debug)] +pub struct DecodeError; + +impl From for DecodeError { + fn from(_: Utf8Error) -> Self { + DecodeError + } +} + +impl From for DecodeError { + fn from(_: ParseIntError) -> Self { + DecodeError + } +} + +pub type DecodeResult = Result; + +pub fn decode(data: &[u8]) -> DecodeResult { + let mut buf = Buffer::new(data); + decode_object(&mut buf) +} + +fn decode_object(buf: &mut Buffer) -> DecodeResult { + match buf.get(0) { + Some(b'i') => { + buf.advance(1); + decode_int(buf, b'e') + }, + Some(b'0' ... b'9') => { + decode_bytes(buf) + } + Some(b'l') => { + buf.advance(1); + let mut list = Vec::new(); + while let Some(term) = buf.get(0) { + if term == b'e' { + buf.advance(1); + break; + } + list.push(decode_object(buf)?); + } + Ok(Object::List(list)) + } + Some(b'd') => { + buf.advance(1); + let mut dict = BTreeMap::new(); + while let Some(term) = buf.get(0) { + if term == b'e' { + buf.advance(1); + break; + } + let key = _decode_bytes(buf)?; + let val = decode_object(buf)?; + dict.insert(key, val); + } + Ok(Object::Dict(dict)) + } + _ => Err(DecodeError), + } +} + +fn decode_int(buf: &mut Buffer, term: u8) -> DecodeResult { + _decode_int(buf, term).map(|num| Object::Int(num)) +} + +fn _decode_int(buf: &mut Buffer, term: u8) -> DecodeResult { + if let Some(end) = buf.find(term) { + let obj = { + let num = str::from_utf8(&buf[..end])?; + num.parse()? + }; + buf.advance(end + 1); + Ok(obj) + } else { + Err(DecodeError) + } +} + +fn decode_bytes(buf: &mut Buffer) -> DecodeResult { + _decode_bytes(buf).map(|bytes| Object::Bytes(bytes)) +} + +fn _decode_bytes(buf: &mut Buffer) -> Result, DecodeError> { + let size = _decode_int(buf, b':')? as usize; + let bytes = buf[..size].to_vec(); + buf.advance(size); + Ok(bytes) +} + +#[test] +fn test_int_pos() { + let mut buf = Buffer::new(b"i1337e"); + buf.advance(1); + + assert_eq!(decode_int(&mut buf, b'e').unwrap(), Object::Int(1337)); + assert_eq!(buf.pos(), 6); +} + +#[test] +fn test_int_neg() { + let mut buf = Buffer::new(b"i-1337e"); + buf.advance(1); + + assert_eq!(decode_int(&mut buf, b'e').unwrap(), Object::Int(-1337)); + assert_eq!(buf.pos(), 7); +} + +#[test] +fn test_bytes() { + let mut buf = Buffer::new(b"5:hello"); + + assert_eq!(decode_bytes(&mut buf).unwrap(), Object::Bytes(b"hello".to_vec())); + assert_eq!(buf.pos(), 7); +} + +#[test] +fn test_list() { + let mut buf = Buffer::new(b"li1ei2ei3ee"); + + let obj = decode_object(&mut buf).unwrap(); + let list = obj.as_list().unwrap(); + + assert_eq!(list, vec![Object::Int(1), Object::Int(2), Object::Int(3)]); + assert_eq!(buf.pos(), 11); +} + +#[test] +fn test_dict() { + let mut buf = Buffer::new(b"d5:helloi1337ee"); + + let obj = decode_object(&mut buf).unwrap(); + let dict = obj.as_dict().unwrap(); + + assert_eq!(dict[&b"hello"[..]], Object::Int(1337)); + assert_eq!(buf.pos(), 15); +} diff --git a/src/bencode/mod.rs b/src/bencode/mod.rs new file mode 100644 index 0000000..a34c164 --- /dev/null +++ b/src/bencode/mod.rs @@ -0,0 +1,72 @@ +mod decode; + +use std::collections::BTreeMap; +use std::fmt; +use std::str; + +pub use self::decode::{decode, DecodeError, DecodeResult}; + +#[derive(Eq, PartialEq)] +pub enum Object { + Int(i64), + Bytes(Vec), + List(Vec), + Dict(BTreeMap, Object>), +} + +impl Object { + pub fn as_int(self) -> Option { + match self { + Object::Int(num) => Some(num), + _ => None, + } + } + + pub fn as_bytes(self) -> Option> { + match self { + Object::Bytes(bytes) => Some(bytes), + _ => None, + } + } + + pub fn as_list(self) -> Option> { + match self { + Object::List(list) => Some(list), + _ => None, + } + } + + pub fn as_dict(self) -> Option, Object>> { + match self { + Object::Dict(dict) => Some(dict), + _ => None, + } + } +} + +impl fmt::Debug for Object { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match *self { + Object::Int(num) => write!(f, "{}", num), + Object::Bytes(ref bytes) => { + match str::from_utf8(bytes) { + Ok(s) => write!(f, "{}", s), + Err(_) => { + for &b in bytes.iter() { + write!(f, "{:X}", b)?; + } + Ok(()) + } + } + } + Object::List(ref list) => { + f.debug_list().entries(list.iter()).finish() + } + Object::Dict(ref dict) => { + f.debug_map() + .entries(dict.iter().map(|(k, v)| (Object::Bytes(k.clone()), v))) + .finish() + } + } + } +} diff --git a/src/buffer.rs b/src/buffer.rs new file mode 100644 index 0000000..8bbebec --- /dev/null +++ b/src/buffer.rs @@ -0,0 +1,96 @@ +use std::ops::{Index, Range, RangeFrom, RangeFull, RangeTo}; + +pub struct Buffer<'b> { + inner: &'b [u8], + pos: usize, +} + +impl<'b> Buffer<'b> { + pub fn new(inner: &[u8]) -> Buffer { + Buffer { + inner: inner, + pos: 0, + } + } + + pub fn advance(&mut self, amt: usize) { + self.pos += amt + } + + pub fn find(&self, byte: u8) -> Option { + self[..].iter().position(|&b| b == byte) + } + + pub fn get(&self, idx: usize) -> Option { + self[..].get(idx).map(|&b| b) + } + + pub fn pos(&self) -> usize { + self.pos + } +} + +impl<'b> Index> for Buffer<'b> { + type Output = [u8]; + + fn index(&self, r: Range) -> &[u8] { + &self.inner[self.pos + r.start..self.pos + r.end] + } +} + +impl<'b> Index> for Buffer<'b> { + type Output = [u8]; + + fn index(&self, r: RangeFrom) -> &[u8] { + &self.inner[self.pos + r.start..] + } +} + +impl<'b> Index> for Buffer<'b> { + type Output = [u8]; + + fn index(&self, r: RangeTo) -> &[u8] { + &self.inner[self.pos..self.pos + r.end] + } +} + +impl<'b> Index for Buffer<'b> { + type Output = [u8]; + + fn index(&self, _: RangeFull) -> &[u8] { + &self.inner[self.pos..] + } +} + +#[test] +fn test_advance() { + let mut b = Buffer::new(b"hello"); + b.advance(2); + assert_eq!(b.pos(), 2); +} + +#[test] +fn test_find() { + let b = Buffer::new(b"hello"); + assert_eq!(b.find(b'l'), Some(2)); + assert_eq!(b.find(b'a'), None); +} + +#[test] +fn test_get() { + let mut b = Buffer::new(b"hello"); + b.advance(2); + assert_eq!(b.get(0), Some(b'l')); + assert_eq!(b.get(1), Some(b'l')); + assert_eq!(b.get(2), Some(b'o')); +} + +#[test] +fn test_range() { + let mut b = Buffer::new(b"hello"); + b.advance(2); + assert_eq!(b[1..2], b"l"[..]); + assert_eq!(b[1..], b"lo"[..]); + assert_eq!(b[..2], b"ll"[..]); + assert_eq!(b[..], b"llo"[..]); +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..842fca0 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,2 @@ +pub mod bencode; +pub mod buffer;