引入编码api
This commit is contained in:
12
Cargo.lock
generated
12
Cargo.lock
generated
@@ -7,6 +7,7 @@ name = "DnfUtils"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"chardetng",
|
||||
"chrono",
|
||||
"colored",
|
||||
"cxx",
|
||||
@@ -162,6 +163,17 @@ version = "1.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
|
||||
|
||||
[[package]]
|
||||
name = "chardetng"
|
||||
version = "0.1.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "14b8f0b65b7b08ae3c8187e8d77174de20cb6777864c6b832d8ad365999cf1ea"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"encoding_rs",
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "chrono"
|
||||
version = "0.4.43"
|
||||
|
||||
@@ -32,6 +32,7 @@ prost = "0.14.3"
|
||||
tokio-tungstenite = "0.28.0"
|
||||
bytes = "1.11.0"
|
||||
spin = "0.10.0"
|
||||
chardetng = "0.1.17"
|
||||
|
||||
[build-dependencies]
|
||||
cxx-build = "1.0.192"
|
||||
|
||||
33
src/lib.rs
33
src/lib.rs
@@ -7,11 +7,11 @@ use encoding_rs::Encoding;
|
||||
use futures_util::SinkExt;
|
||||
use prost::Message as WebMessage;
|
||||
use spin::Mutex;
|
||||
use std::sync::OnceLock;
|
||||
use std::sync::mpsc;
|
||||
use tokio::net::TcpStream;
|
||||
use tokio::runtime::Runtime;
|
||||
use tokio_tungstenite::tungstenite::Message;
|
||||
use std::sync::OnceLock;
|
||||
|
||||
static LOGGER_SENDER: OnceLock<mpsc::Sender<Starpoles>> = OnceLock::new();
|
||||
|
||||
@@ -28,6 +28,15 @@ mod ffi {
|
||||
|
||||
fn http_get(url: &CxxString) -> Result<String>;
|
||||
|
||||
/// 尝试将未知编码的字符串转换为UTF-8
|
||||
fn to_utf8(string: &CxxString) -> String;
|
||||
|
||||
/// UTF-16LE转换为UTF-8
|
||||
fn unicode_to_utf_8(string: &CxxString) -> String;
|
||||
|
||||
/// 猜测编码
|
||||
fn guess_encoding(string: &CxxString) -> String;
|
||||
|
||||
fn init_log(is_debug: bool, ws_uel: &CxxString);
|
||||
|
||||
fn log_trace(msg: &CxxString);
|
||||
@@ -42,6 +51,28 @@ mod ffi {
|
||||
}
|
||||
}
|
||||
|
||||
fn unicode_to_utf_8(string: &CxxString) -> String {
|
||||
// 约定:传入的内容按 UTF-16LE 字节序解释
|
||||
let mut bytes = string.as_bytes();
|
||||
|
||||
// UTF-16 必须是偶数长度;若是奇数长度,丢弃最后 1 个字节避免越界/误解码
|
||||
if (bytes.len() & 1) == 1 {
|
||||
bytes = &bytes[..bytes.len() - 1];
|
||||
}
|
||||
|
||||
// 用 encoding_rs 直接把 UTF-16LE 转成 Rust 的 UTF-8 String
|
||||
let (cow, _actual_used, _had_errors) = encoding_rs::UTF_16LE.decode(bytes);
|
||||
cow.into_owned()
|
||||
}
|
||||
|
||||
fn guess_encoding(string: &CxxString) -> String {
|
||||
utils::guess_encoding_label(string)
|
||||
}
|
||||
|
||||
fn to_utf8(string: &CxxString) -> String {
|
||||
cxx_string_to_string(string).to_string()
|
||||
}
|
||||
|
||||
fn log_error(msg: &cxx::CxxString) {
|
||||
let msg = cxx_string_to_string(msg);
|
||||
let lock = LOGGER_SENDER.get().unwrap();
|
||||
|
||||
@@ -1,5 +1,13 @@
|
||||
use chardetng::EncodingDetector;
|
||||
use encoding_rs::Encoding;
|
||||
|
||||
pub fn guess_encoding_label(s: &cxx::CxxString) -> String {
|
||||
let mut det = EncodingDetector::new();
|
||||
det.feed(s.as_bytes(), true);
|
||||
let enc = det.guess(None, true);
|
||||
enc.name().to_string()
|
||||
}
|
||||
|
||||
pub fn cxx_string_to_string(s: &cxx::CxxString) -> String {
|
||||
match s.to_str() {
|
||||
Ok(s) => return s.to_string(),
|
||||
|
||||
Reference in New Issue
Block a user