引入编码api

This commit is contained in:
2026-01-30 01:11:26 +08:00
parent 94e81cbfd6
commit f59b41c238
4 changed files with 53 additions and 1 deletions

12
Cargo.lock generated
View File

@@ -7,6 +7,7 @@ name = "DnfUtils"
version = "0.1.0"
dependencies = [
"bytes",
"chardetng",
"chrono",
"colored",
"cxx",
@@ -162,6 +163,17 @@ version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
[[package]]
name = "chardetng"
version = "0.1.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14b8f0b65b7b08ae3c8187e8d77174de20cb6777864c6b832d8ad365999cf1ea"
dependencies = [
"cfg-if",
"encoding_rs",
"memchr",
]
[[package]]
name = "chrono"
version = "0.4.43"

View File

@@ -32,6 +32,7 @@ prost = "0.14.3"
tokio-tungstenite = "0.28.0"
bytes = "1.11.0"
spin = "0.10.0"
chardetng = "0.1.17"
[build-dependencies]
cxx-build = "1.0.192"

View File

@@ -7,11 +7,11 @@ use encoding_rs::Encoding;
use futures_util::SinkExt;
use prost::Message as WebMessage;
use spin::Mutex;
use std::sync::OnceLock;
use std::sync::mpsc;
use tokio::net::TcpStream;
use tokio::runtime::Runtime;
use tokio_tungstenite::tungstenite::Message;
use std::sync::OnceLock;
static LOGGER_SENDER: OnceLock<mpsc::Sender<Starpoles>> = OnceLock::new();
@@ -28,6 +28,15 @@ mod ffi {
fn http_get(url: &CxxString) -> Result<String>;
/// 尝试将未知编码的字符串转换为UTF-8
fn to_utf8(string: &CxxString) -> String;
/// UTF-16LE转换为UTF-8
fn unicode_to_utf_8(string: &CxxString) -> String;
/// 猜测编码
fn guess_encoding(string: &CxxString) -> String;
fn init_log(is_debug: bool, ws_uel: &CxxString);
fn log_trace(msg: &CxxString);
@@ -42,6 +51,28 @@ mod ffi {
}
}
fn unicode_to_utf_8(string: &CxxString) -> String {
// 约定:传入的内容按 UTF-16LE 字节序解释
let mut bytes = string.as_bytes();
// UTF-16 必须是偶数长度;若是奇数长度,丢弃最后 1 个字节避免越界/误解码
if (bytes.len() & 1) == 1 {
bytes = &bytes[..bytes.len() - 1];
}
// 用 encoding_rs 直接把 UTF-16LE 转成 Rust 的 UTF-8 String
let (cow, _actual_used, _had_errors) = encoding_rs::UTF_16LE.decode(bytes);
cow.into_owned()
}
fn guess_encoding(string: &CxxString) -> String {
utils::guess_encoding_label(string)
}
fn to_utf8(string: &CxxString) -> String {
cxx_string_to_string(string).to_string()
}
fn log_error(msg: &cxx::CxxString) {
let msg = cxx_string_to_string(msg);
let lock = LOGGER_SENDER.get().unwrap();

View File

@@ -1,5 +1,13 @@
use chardetng::EncodingDetector;
use encoding_rs::Encoding;
pub fn guess_encoding_label(s: &cxx::CxxString) -> String {
let mut det = EncodingDetector::new();
det.feed(s.as_bytes(), true);
let enc = det.guess(None, true);
enc.name().to_string()
}
pub fn cxx_string_to_string(s: &cxx::CxxString) -> String {
match s.to_str() {
Ok(s) => return s.to_string(),