提示词
请用rust quickxml库实现读取xml的row和c标签信息,并输出到csv格式,要求是:数值型c,输出标签的内容,字符串型c(t = “inlineStr”),输出的内容,row的r属性表是行号,c的r属性是字母列号行号,如果不连续,就在中间补空列,用逗号分隔各列,一行完全输出后换行,如果1行全为空,不输出该行,为便于查对,每行开头输出row的r属性行号,利用内存映射文件实现。只输出代码,不干别的。
use quick_xml::events::Event;
use quick_xml::Reader;
use memmap2::Mmap;
use std::fs::File;
use std::io::{self, Write};
use std::path::Path;
fn main() -> io::Result<()> {
let file_path = "input.xml";
let file = File::open(file_path)?;
let mmap = unsafe { Mmap::map(&file)? };
let mut reader = Reader::from_reader(&mmap[..]);
reader.trim_text(true);
let mut buf = Vec::new();
let mut current_row: Option<u32> = None;
let mut current_cells: Vec<Option<String>> = Vec::new();
let mut output = io::stdout();
loop {
match reader.read_event_into(&mut buf) {
Ok(Event::Start(ref e)) => {
match e.name().as_ref() {
b"row" => {
if let Some(attrs) = e.attributes().filter_map(Result::ok).find(|a| a.key.as_ref() == b"r") {
if let Ok(r_val) = String::from_utf8(attrs.value.to_vec()) {
current_row = r_val.parse().ok();
}
}
current_cells.clear();
}
b"c" => {
let mut cell_type = None;
let mut cell_ref = None;
for attr in e.attributes().filter_map(Result::ok) {
match attr.key.as_ref() {
b"t" => {
cell_type = String::from_utf8(attr.value.to_vec()).ok();
}
b"r" => {
cell_ref = String::from_utf8(attr.value.to_vec()).ok();
}
_ => {}
}
}
let mut cell_value = None;
let mut in_v = false;
let mut in_t = false;
loop {
match reader.read_event_into(&mut buf) {
Ok(Event::Start(ref e)) => {
match e.name().as_ref() {
b"v" => in_v = true,
b"t" => in_t = true,
_ => {}
}
}
Ok(Event::Text(e)) => {
if in_v || in_t {
if let Ok(text) = e.unescape() {
cell_value = Some(text.to_string());
}
}
}
Ok(Event::End(ref e)) => {
if e.name().as_ref() == b"c" {
break;
}
if e.name().as_ref() == b"v" {
in_v = false;
}
if e.name().as_ref() == b"t" {
in_t = false;
}
}
Ok(Event::Eof) => break,
_ => {}
}
}
if let Some(c_ref) = cell_ref {
if let Some(col) = parse_column(&c_ref) {
while current_cells.len() <= col {
current_cells.push(None);
}
current_cells[col] = cell_value;
}
}
}
_ => {}
}
}
Ok(Event::End(ref e)) => {
if e.name().as_ref() == b"row" {
if let Some(row_num) = current_row {
if current_cells.iter().any(Option::is_some) {
write!(output, "{}", row_num)?;
for cell in ¤t_cells {
write!(output, ",")?;
if let Some(val) = cell {
write!(output, "\"{}\"", val)?;
}
}
writeln!(output)?;
}
}
current_row = None;
current_cells.clear();
}
}
Ok(Event::Eof) => break,
Err(e) => {
eprintln!("Error reading XML: {}", e);
break;
}
_ => {}
}
buf.clear();
}
Ok(())
}
fn parse_column(ref_str: &str) -> Option<usize> {
let mut col_str = String::new();
for c in ref_str.chars() {
if c.is_ascii_alphabetic() {
col_str.push(c);
} else {
break;
}
}
if col_str.is_empty() {
return None;
}
let mut col = 0;
for c in col_str.chars() {
col = col * 26 + (c.to_ascii_uppercase() as usize - 'A' as usize + 1);
}
Some(col - 1)
}
相应Cargo.toml
[package]
name = "xml-to-csv"
version = "0.1.0"
edition = "2024"
[dependencies]
quick-xml = "0.31.0"
memmap2 = "0.9.0"
编译执行
export CARGO_INCREMENTAL=1
export PATH=/par:/par/mold240/bin:$PATH
cargo build --release
/par/xmlcsv/target/release# ./xml-to-csv
1,"1","15519","785","1","17.00","24386.67","0.04","0.02","N","O","35137.0","35107.0","35146.0","DELIVER IN PERSON","TRUCK","to beans x-ray carefull"
2,"1","6731","732","2","36.00","58958.28","0.09","0.06","N","O","35167.0","35123.0","35175.0","TAKE BACK RETURN","MAIL","according to the final foxes. qui"
/par# time xmlcsv/target/release/xml-to-csv >quickxml.csv
real 1m28.133s
user 0m5.104s
sys 0m5.273s