diff --git a/Cargo.toml b/Cargo.toml index d97821a..ffa3152 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,11 +1,12 @@ [package] name = "wchar" -version = "0.10.1" +version = "0.11.0" authors = ["Juici "] description = "Procedural macros for compile time UTF-16 and UTF-32 wide strings." edition = "2018" license = "MIT OR Apache-2.0" readme = "README.md" +build = "build.rs" repository = "https://github.com/Juici/wchar-rs" documentation = "https://docs.rs/wchar" @@ -18,7 +19,7 @@ default = [] unstable = ["wchar-impl/unstable"] [dependencies] -wchar-impl = { version = "0.10.0", path = "impl" } +wchar-impl = { version = "0.11.0", path = "impl" } [dev-dependencies] anyhow = "1.0" diff --git a/build.rs b/build.rs new file mode 100644 index 0000000..c58c24f --- /dev/null +++ b/build.rs @@ -0,0 +1,294 @@ +use std::collections::HashMap; + +macro_rules! consts { + ($($arch:ident),* $(,)?) => { + $( + #[allow(non_upper_case_globals, dead_code)] + pub const $arch: &str = stringify!($arch); + )* + }; +} + +mod arch { + consts! { + aarch64, + arm, + hexagon, + mips, + mips64, + powerpc, + powerpc64, + riscv32, + riscv64, + sparc, + sparc64, + s390x, + xtensa, + x86, + x86_64, + } +} + +mod os { + consts! { + android, + dragonfly, + emscripten, + freebsd, + fushia, + haiku, + hermit, + illumos, + ios, + linux, + l4re, + macos, + netbsd, + openbsd, + psp, + redox, + solaris, + switch, + vxworks, + wasi, + windows, + } +} + +mod family { + consts! { + unix, + windows, + wasm, + } +} + +mod env { + consts! { + gnu, + musl, + mvsc, + new_lib, + sgx, + uclibc, + wasi, + } +} + +mod vendor { + consts! { + fortanix, + } +} + +struct Cfg { + vars: HashMap, +} + +impl Cfg { + fn from_env() -> Cfg { + let vars: HashMap = std::env::vars() + .filter(|(key, _)| key.starts_with("CARGO_CFG")) + .collect(); + + Cfg { vars } + } + + fn cfg(&self, cfg: &str) -> Option<&str> { + let mut cfg = format!("CARGO_CFG_{}", cfg); + cfg["CARGO_CFG_".len()..].make_ascii_uppercase(); + + self.vars.get(&cfg).map(|s| s.as_str()) + } + + fn unix(&self) -> bool { + self.family() == Some(family::unix) + } + + fn windows(&self) -> bool { + self.family() == Some(family::windows) + } + + fn family(&self) -> Option<&str> { + self.cfg("target_family") + } + + fn os(&self) -> Option<&str> { + self.cfg("target_os") + } + + fn arch(&self) -> Option<&str> { + self.cfg("target_arch") + } + + fn vendor(&self) -> Option<&str> { + self.cfg("target_vendor") + } + + fn env(&self) -> Option<&str> { + self.cfg("target_env") + } + + // fn pointer_width(&self) -> Option<&str> { + // self.cfg("target_pointer_width") + // } + // + // fn endian(&self) -> Option<&str> { + // self.cfg("target_endian") + // } + // + // fn feature(&self, value: &str) -> bool { + // self.features().any(|feature| feature == value) + // } + // + // fn features(&self) -> impl Iterator { + // self.cfg("target_feature").unwrap_or_default().split(',') + // } +} + +enum WChar { + U16, + U32, + I32, + Unknown, +} + +fn main() { + let wchar_t = match get_platform_wchar() { + WChar::U16 => Some("u16"), + WChar::U32 => Some("u32"), + WChar::I32 => Some("i32"), + WChar::Unknown => None, + }; + + match wchar_t { + Some(wchar_t) => println!("cargo:rustc-cfg=wchar_t=\"{}\"", wchar_t), + None => println!("cargo:warning=unknown platform wchar_t"), + } +} + +fn get_platform_wchar() -> WChar { + let cfg = Cfg::from_env(); + + if cfg.windows() { + return WChar::U16; + } + + match cfg.os() { + Some(os::fushia) => match cfg.arch() { + Some(arch::aarch64) => WChar::U32, + Some(arch::x86_64) => WChar::I32, + _ => WChar::Unknown, + }, + Some(os::switch) => WChar::U32, + Some(os::psp) => WChar::Unknown, // TODO: No info in libc. + Some(os::vxworks) => match cfg.arch() { + Some(arch::aarch64) => WChar::U32, + Some(arch::arm) => WChar::U32, + Some(arch::x86 | arch::x86_64) => WChar::I32, + Some(arch::powerpc | arch::powerpc64) => WChar::U32, + _ => WChar::Unknown, + }, + os if cfg.unix() => get_unix_wchar(&cfg, os), + Some(os::hermit) => get_hermit_wchar(&cfg), + os => { + let env = cfg.env(); + if env == Some(env::sgx) || cfg.vendor() == Some(vendor::fortanix) { + WChar::Unknown // TODO: No info in libc. + } else if env == Some(env::wasi) || os == Some(os::wasi) { + WChar::I32 + } else { + WChar::Unknown + } + } + } +} + +fn get_unix_wchar(cfg: &Cfg, os: Option<&str>) -> WChar { + #[allow(non_upper_case_globals)] + const c_int: WChar = WChar::I32; + #[allow(non_upper_case_globals)] + const c_uint: WChar = WChar::U32; + + let env = cfg.env(); + + if let Some(env::new_lib) = env { + return match cfg.arch() { + Some(arch::aarch64) => WChar::U32, + Some(arch::arm) => WChar::U32, + Some(arch::powerpc) => c_int, + Some(arch::xtensa) => WChar::U32, + _ => WChar::Unknown, + }; + } + + match os { + Some(os::emscripten) => WChar::I32, + Some(os::linux | os::l4re) => match env { + Some(env::uclibc) => match cfg.arch() { + Some(arch::arm) => c_uint, + Some(arch::mips | arch::mips64) => WChar::I32, + Some(arch::x86_64) => c_int, + _ => WChar::Unknown, + }, + Some(env::musl) => match cfg.arch() { + Some(arch::aarch64) => WChar::U32, + Some(arch::arm) => WChar::U32, + Some(arch::hexagon) => WChar::U32, + Some(arch::mips) => c_int, + Some(arch::mips64) => WChar::I32, + Some(arch::powerpc | arch::powerpc64) => WChar::I32, + Some(arch::s390x) => WChar::I32, + Some(arch::x86 | arch::x86_64) => WChar::I32, + _ => WChar::Unknown, + }, + Some(env::gnu) => match cfg.arch() { + Some(arch::aarch64) => WChar::U32, + Some(arch::arm) => WChar::U32, + Some(arch::mips | arch::mips64) => WChar::I32, + Some(arch::powerpc | arch::powerpc64) => WChar::I32, + Some(arch::riscv32 | arch::riscv64) => c_int, + Some(arch::sparc | arch::sparc64) => WChar::I32, + Some(arch::s390x) => WChar::I32, + Some(arch::x86 | arch::x86_64) => WChar::I32, + _ => WChar::Unknown, + }, + _ => WChar::Unknown, + }, + Some(os::android) => match cfg.arch() { + Some(arch::aarch64) => WChar::U32, + Some(arch::arm) => WChar::U32, + Some(arch::x86 | arch::x86_64) => WChar::I32, + _ => WChar::Unknown, + }, + + Some(os::ios | os::macos) => WChar::I32, + Some(os::openbsd | os::netbsd) => WChar::I32, + Some(os::dragonfly) => WChar::I32, + Some(os::freebsd) => match cfg.arch() { + Some(arch::aarch64) => WChar::U32, + Some(arch::arm) => WChar::U32, + Some(arch::powerpc64) => WChar::I32, + Some(arch::x86 | arch::x86_64) => WChar::I32, + _ => WChar::Unknown, + }, + + Some(os::solaris | os::illumos) => c_int, + + Some(os::haiku) => WChar::I32, + + Some(os::hermit) => get_hermit_wchar(cfg), + + Some(os::redox) => WChar::I32, + + _ => WChar::Unknown, + } +} + +fn get_hermit_wchar(cfg: &Cfg) -> WChar { + match cfg.arch() { + Some(arch::aarch64) => WChar::U32, + Some(arch::x86_64) => WChar::I32, + _ => WChar::Unknown, + } +} diff --git a/impl/Cargo.toml b/impl/Cargo.toml index bef3d2f..b3fa424 100644 --- a/impl/Cargo.toml +++ b/impl/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "wchar-impl" -version = "0.10.0" +version = "0.11.0" authors = ["Juici "] description = "Internal implementation of wchar." edition = "2018" @@ -19,5 +19,3 @@ unstable = ["proc-macro2/nightly"] proc-macro2 = "1.0" quote = "1.0" syn = { version = "1.0", default-features = false, features = ["parsing", "printing", "proc-macro"] } - -libc = { version = "0.2.94", default-features = false } diff --git a/impl/src/encode.rs b/impl/src/encode.rs index 6b55def..736201b 100644 --- a/impl/src/encode.rs +++ b/impl/src/encode.rs @@ -7,7 +7,7 @@ use syn::{Error, LitChar, Result}; use crate::parse::WCharType; -pub fn expand_char(ty: Option, c: LitChar) -> Result { +pub fn expand_char(ty: WCharType, c: LitChar) -> Result { fn quote_char(c: LitChar) -> Result { match T::encode_char(c.value()) { Some(c) => Ok(quote::quote! { #c }), @@ -22,41 +22,38 @@ pub fn expand_char(ty: Option, c: LitChar) -> Result { } match ty { - Some(WCharType::U16(_)) => quote_char::(c), - Some(WCharType::U32(_)) => quote_char::(c), - Some(WCharType::I16(_)) => quote_char::(c), - Some(WCharType::I32(_)) => quote_char::(c), - None => quote_char::(c), + WCharType::U16(_) => quote_char::(c), + WCharType::U32(_) => quote_char::(c), + WCharType::I16(_) => quote_char::(c), + WCharType::I32(_) => quote_char::(c), } } -pub fn expand_str(ty: Option, text: &str) -> TokenStream { +pub fn expand_str(ty: WCharType, text: &str) -> TokenStream { fn quote_str(text: &str) -> TokenStream { let chars = T::encode_str(text); quote::quote! { &[#(#chars),*] } } match ty { - Some(WCharType::U16(_)) => quote_str::(text), - Some(WCharType::U32(_)) => quote_str::(text), - Some(WCharType::I16(_)) => quote_str::(text), - Some(WCharType::I32(_)) => quote_str::(text), - None => quote_str::(text), + WCharType::U16(_) => quote_str::(text), + WCharType::U32(_) => quote_str::(text), + WCharType::I16(_) => quote_str::(text), + WCharType::I32(_) => quote_str::(text), } } -pub fn expand_str_c(ty: Option, text: &str) -> TokenStream { +pub fn expand_str_c(ty: WCharType, text: &str) -> TokenStream { fn quote_str_c(text: &str) -> TokenStream { let chars = T::encode_str_c(text); quote::quote! { &[#(#chars),*] } } match ty { - Some(WCharType::U16(_)) => quote_str_c::(text), - Some(WCharType::U32(_)) => quote_str_c::(text), - Some(WCharType::I16(_)) => quote_str_c::(text), - Some(WCharType::I32(_)) => quote_str_c::(text), - None => quote_str_c::(text), + WCharType::U16(_) => quote_str_c::(text), + WCharType::U32(_) => quote_str_c::(text), + WCharType::I16(_) => quote_str_c::(text), + WCharType::I32(_) => quote_str_c::(text), } } diff --git a/impl/src/lib.rs b/impl/src/lib.rs index 2a82fdf..9d92b07 100644 --- a/impl/src/lib.rs +++ b/impl/src/lib.rs @@ -6,10 +6,8 @@ use std::fs; use std::path::PathBuf; use proc_macro2::{Span, TokenStream}; -use quote::ToTokens; use syn::{Error, Result}; -use crate::encode::Encode; use crate::parse::{IncludeInput, LitStrOrChar, WchInput, WchzInput}; mod encode; @@ -23,25 +21,13 @@ fn expand_macro Result>(f: F) -> proc_macro::TokenSt } } -#[proc_macro] -pub fn wchar_t(input: proc_macro::TokenStream) -> proc_macro::TokenStream { - let _: syn::parse::Nothing = syn::parse_macro_input!(input); - - let ty = libc::wchar_t::wchar_type(); - ty.to_token_stream().into() -} - #[proc_macro] pub fn wch(input: proc_macro::TokenStream) -> proc_macro::TokenStream { let WchInput { ty, literal, .. } = syn::parse_macro_input!(input); - expand_macro(|| { - let ty = ty.map(|(ty, _)| ty); - - match literal { - LitStrOrChar::Str(lit) => Ok(encode::expand_str(ty, &lit.value())), - LitStrOrChar::Char(lit) => encode::expand_char(ty, lit), - } + expand_macro(|| match literal { + LitStrOrChar::Str(lit) => Ok(encode::expand_str(ty, &lit.value())), + LitStrOrChar::Char(lit) => encode::expand_char(ty, lit), }) } @@ -50,7 +36,6 @@ pub fn wchz(input: proc_macro::TokenStream) -> proc_macro::TokenStream { let WchzInput { ty, literal, .. } = syn::parse_macro_input!(input); expand_macro(|| { - let ty = ty.map(|(ty, _)| ty); let text = literal.value(); if text.as_bytes().contains(&0) { @@ -69,7 +54,6 @@ pub fn include_wch(input: proc_macro::TokenStream) -> proc_macro::TokenStream { let IncludeInput { ty, file_path, .. } = syn::parse_macro_input!(input); expand_macro(|| { - let ty = ty.map(|(ty, _)| ty); let text = read_file(&file_path)?; Ok(encode::expand_str(ty, &text)) @@ -81,7 +65,6 @@ pub fn include_wchz(input: proc_macro::TokenStream) -> proc_macro::TokenStream { let IncludeInput { ty, file_path, .. } = syn::parse_macro_input!(input); expand_macro(|| { - let ty = ty.map(|(ty, _)| ty); let text = read_file(&file_path)?; if text.as_bytes().contains(&0) { diff --git a/impl/src/parse.rs b/impl/src/parse.rs index f5d527e..ecddb32 100644 --- a/impl/src/parse.rs +++ b/impl/src/parse.rs @@ -1,6 +1,6 @@ use proc_macro2::TokenStream; use quote::ToTokens; -use syn::parse::{Lookahead1, Parse, ParseStream, Result}; +use syn::parse::{Parse, ParseStream, Result}; use syn::{LitChar, LitStr, Token}; mod kw { @@ -17,15 +17,6 @@ pub enum WCharType { I32(kw::i32), } -impl WCharType { - fn peek(lookahead: &Lookahead1) -> bool { - lookahead.peek(kw::u16) - || lookahead.peek(kw::u32) - || lookahead.peek(kw::i16) - || lookahead.peek(kw::i32) - } -} - impl Parse for WCharType { fn parse(input: ParseStream) -> Result { let lookahead = input.lookahead1(); @@ -73,69 +64,48 @@ impl Parse for LitStrOrChar { } pub struct WchInput { - pub ty: Option<(WCharType, Token![,])>, + pub ty: WCharType, + pub comma: Token![,], pub literal: LitStrOrChar, } impl Parse for WchInput { fn parse(input: ParseStream) -> Result { - let lookahead = input.lookahead1(); - let ty = if WCharType::peek(&lookahead) { - Some((input.parse()?, input.parse()?)) - } else if lookahead.peek(LitStr) || lookahead.peek(LitChar) { - None - } else { - return Err(lookahead.error()); - }; - Ok(WchInput { - ty, + ty: input.parse()?, + comma: input.parse()?, literal: input.parse()?, }) } } pub struct WchzInput { - pub ty: Option<(WCharType, Token![,])>, + pub ty: WCharType, + pub comma: Token![,], pub literal: LitStr, } impl Parse for WchzInput { fn parse(input: ParseStream) -> Result { - let lookahead = input.lookahead1(); - let ty = if WCharType::peek(&lookahead) { - Some((input.parse()?, input.parse()?)) - } else if lookahead.peek(LitStr) { - None - } else { - return Err(lookahead.error()); - }; - Ok(WchzInput { - ty, + ty: input.parse()?, + comma: input.parse()?, literal: input.parse()?, }) } } pub struct IncludeInput { - pub ty: Option<(WCharType, Token![,])>, + pub ty: WCharType, + pub comma: Token![,], pub file_path: LitStr, } impl Parse for IncludeInput { fn parse(input: ParseStream) -> Result { - let lookahead = input.lookahead1(); - let ty = if WCharType::peek(&lookahead) { - Some((input.parse()?, input.parse()?)) - } else if lookahead.peek(LitStr) { - None - } else { - return Err(lookahead.error()); - }; - Ok(IncludeInput { - ty, + ty: input.parse()?, + comma: input.parse()?, file_path: input.parse()?, }) } diff --git a/src/lib.rs b/src/lib.rs index 24b68f0..4653793 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -17,9 +17,42 @@ #![no_std] -/// Platform wide character type. -#[allow(non_camel_case_types)] -pub type wchar_t = wchar_impl::wchar_t!(); +#[doc(hidden)] +pub use wchar_impl as _impl; + +macro_rules! wchar_t { + ($ty:ident) => { + /// Platform wide character type. + #[allow(non_camel_case_types)] + pub type wchar_t = $ty; + + #[doc(hidden)] + #[macro_export] + macro_rules! __expand_platform_wchar { + ($macro:ident, $string:literal) => { + $crate::_impl::$macro!($ty, $string) + }; + } + }; + () => { + #[doc(hidden)] + #[macro_export] + macro_rules! __expand_platform_wchar { + ($macro:ident, $string:literal) => { + $crate::_core::compile_error!("native wchar_t not support for this platform"); + }; + } + }; +} + +#[cfg(wchar_t = "u16")] +wchar_t!(u16); +#[cfg(wchar_t = "u32")] +wchar_t!(u32); +#[cfg(wchar_t = "i32")] +wchar_t!(i32); +#[cfg(not(any(wchar_t = "u16", wchar_t = "u32", wchar_t = "i32")))] +wchar_t!(); /// Generate a UTF-16 or UTF-32 wide string from a string literal. /// @@ -62,7 +95,15 @@ pub type wchar_t = wchar_impl::wchar_t!(); /// /// assert_eq!(wide_str, expected); /// ``` -pub use wchar_impl::wch; +#[macro_export] +macro_rules! wch { + ($ty:ident, $string:literal) => { + $crate::_impl::wch!($ty, $string) + }; + ($string:literal) => { + $crate::__expand_platform_wchar!(wch, $string) + }; +} /// Generate a C-style nul-terminated UTF-16 or UTF-32 wide string from a /// string literal. @@ -103,7 +144,15 @@ pub use wchar_impl::wch; /// /// assert_eq!(wide_str, expected); /// ``` -pub use wchar_impl::wchz; +#[macro_export] +macro_rules! wchz { + ($ty:ident, $string:literal) => { + $crate::_impl::wchz!($ty, $string) + }; + ($string:literal) => { + $crate::__expand_platform_wchar!(wchz, $string) + }; +} /// Generate a UTF-16 or UTF-32 wide string from a UTF-8 encoded file. /// @@ -117,7 +166,15 @@ pub use wchar_impl::wchz; /// Whilst this macro can be used for C-style nul-terminated wide strings, no /// validations are made about internal nul characters. If your strings need to /// be nul-terminated it is recommended to use [`include_wchz`]. -pub use wchar_impl::include_wch; +#[macro_export] +macro_rules! include_wch { + ($ty:ident, $string:literal) => { + $crate::_impl::include_wch!($ty, $string) + }; + ($string:literal) => { + $crate::__expand_platform_wchar!(include_wch, $string) + }; +} /// Generate a UTF-16 or UTF-32 wide string from a UTF-8 encoded file. /// @@ -128,4 +185,12 @@ pub use wchar_impl::include_wch; /// /// The first argument is the output character type, if no type is specified the /// platform native `wchar_t` will be used. -pub use wchar_impl::include_wchz; +#[macro_export] +macro_rules! include_wchz { + ($ty:ident, $string:literal) => { + $crate::_impl::include_wchz!($ty, $string) + }; + ($string:literal) => { + $crate::__expand_platform_wchar!(include_wchz, $string) + }; +} diff --git a/tests/ui/invalid_type_wch.stderr b/tests/ui/invalid_type_wch.stderr index a2f1f87..a4935e9 100644 --- a/tests/ui/invalid_type_wch.stderr +++ b/tests/ui/invalid_type_wch.stderr @@ -1,5 +1,7 @@ -error: expected one of: `u16`, `u32`, `i16`, `i32`, string literal, character literal - --> $DIR/invalid_type_wch.rs:3:35 +error: expected one of: `u16`, `u32`, `i16`, `i32` + --> $DIR/invalid_type_wch.rs:3:30 | 3 | const INVALID_TYPE: &[f32] = wch!(f32, "oops"); - | ^^^ + | ^^^^^^^^^^^^^^^^^ + | + = note: this error originates in the macro `wch` (in Nightly builds, run with -Z macro-backtrace for more info) diff --git a/tests/ui/invalid_type_wchz.stderr b/tests/ui/invalid_type_wchz.stderr index 4f729a4..0c0a69a 100644 --- a/tests/ui/invalid_type_wchz.stderr +++ b/tests/ui/invalid_type_wchz.stderr @@ -1,5 +1,7 @@ -error: expected one of: `u16`, `u32`, `i16`, `i32`, string literal - --> $DIR/invalid_type_wchz.rs:3:36 +error: expected one of: `u16`, `u32`, `i16`, `i32` + --> $DIR/invalid_type_wchz.rs:3:30 | 3 | const INVALID_TYPE: &[f32] = wchz!(f32, "oops"); - | ^^^ + | ^^^^^^^^^^^^^^^^^^ + | + = note: this error originates in the macro `wchz` (in Nightly builds, run with -Z macro-backtrace for more info) diff --git a/tests/ui_unstable/invalid_type_include_wch.stderr b/tests/ui_unstable/invalid_type_include_wch.stderr index c05f83e..9799985 100644 --- a/tests/ui_unstable/invalid_type_include_wch.stderr +++ b/tests/ui_unstable/invalid_type_include_wch.stderr @@ -1,5 +1,7 @@ -error: expected one of: `u16`, `u32`, `i16`, `i32`, string literal - --> $DIR/invalid_type_include_wch.rs:3:43 +error: expected one of: `u16`, `u32`, `i16`, `i32` + --> $DIR/invalid_type_include_wch.rs:3:30 | 3 | const INVALID_TYPE: &[f32] = include_wch!(f32, "../data/basic.txt"); - | ^^^ + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + | + = note: this error originates in the macro `include_wch` (in Nightly builds, run with -Z macro-backtrace for more info) diff --git a/tests/ui_unstable/invalid_type_include_wchz.stderr b/tests/ui_unstable/invalid_type_include_wchz.stderr index aed088e..00cf468 100644 --- a/tests/ui_unstable/invalid_type_include_wchz.stderr +++ b/tests/ui_unstable/invalid_type_include_wchz.stderr @@ -1,5 +1,7 @@ -error: expected one of: `u16`, `u32`, `i16`, `i32`, string literal - --> $DIR/invalid_type_include_wchz.rs:3:44 +error: expected one of: `u16`, `u32`, `i16`, `i32` + --> $DIR/invalid_type_include_wchz.rs:3:30 | 3 | const INVALID_TYPE: &[f32] = include_wchz!(f32, "../data/basic.txt"); - | ^^^ + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + | + = note: this error originates in the macro `include_wchz` (in Nightly builds, run with -Z macro-backtrace for more info)