diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 2e68cdcfc..62c0f8825 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -37,7 +37,7 @@ jobs: zip -rq docs-${TAG_NAME}.zip doc doc-data - name: Release - uses: softprops/action-gh-release@v3 + uses: softprops/action-gh-release@v2 with: files: | docs/docs-${{ github.ref_name }}.tar.gz diff --git a/UnicodeBasic.lean b/UnicodeBasic.lean index eca1c62aa..b55675191 100644 --- a/UnicodeBasic.lean +++ b/UnicodeBasic.lean @@ -58,27 +58,6 @@ namespace Unicode @[inline] def getName (char : Char) : String := lookupName char.val -/-! - ## Script ## --/ - -/-- Get character script - - Unicode property: `Script` --/ -@[inline] -def getScript (char : Char) : Script := lookupScript char.val - -/-- Get script name - - Returns `none` if the script code is unassigned. - - Unicode property: `Script` --/ -@[inline] -def getScriptName? (s : Script) : Option String := - lookupScriptName s |>.map toString - /-! ## Bidirectional Properties ## -/ diff --git a/UnicodeBasic/TableLookup.lean b/UnicodeBasic/TableLookup.lean index 2733a4bce..d4a1eb9dd 100644 --- a/UnicodeBasic/TableLookup.lean +++ b/UnicodeBasic/TableLookup.lean @@ -22,9 +22,6 @@ protected abbrev oMath : UInt64 := 0x800000000 @[extern "unicode_prop_lookup"] protected opaque lookupProp (c : UInt32) : UInt64 -@[extern "unicode_script_lookup"] -protected opaque lookupScript (c : UInt32) : Script - end CLib /-- Binary search -/ @@ -389,22 +386,4 @@ where str : String := include_str "../data/table/White_Space.txt" table : Thunk <| Array (UInt32 × UInt32) := parsePropTable str -/-- Get the script of a code point using lookup table - - Unicode property: `Script` -/ -@[inline] -def lookupScript (c : UInt32) : Script := CLib.lookupScript c - -/-- Get the name of a script - - Unicode property: `Script` -/ -def lookupScriptName (s : Script) : Option String.Slice := - let table := table.get - if s.code < table[0]!.1 then none else - match table[find s.code (fun i => table[i]!.1) 0 table.size.toUSize]! with - | (c, v) => if s.code = c then some v else none -where - str : String := include_str "../data/table/Script_Name.txt" - table : Thunk <| Array (UInt32 × String.Slice) := parseTable str fun _ n => n[0]! - end Unicode diff --git a/UnicodeBasic/Types.lean b/UnicodeBasic/Types.lean index ee0505525..ca6fd1598 100644 --- a/UnicodeBasic/Types.lean +++ b/UnicodeBasic/Types.lean @@ -3,8 +3,6 @@ Copyright © 2023-2025 François G. Dorais. All rights reserved. Released under Apache 2.0 license as described in the file LICENSE. -/ -import Std.Data.HashMap - /-- Low-level conversion from `UInt32` to `Char` (*unsafe*) This function translates to a no-op in the compiler. However, it does not @@ -1013,64 +1011,4 @@ def BidiClass.ofAbbrev! (abbr : String.Slice) : BidiClass := instance : Repr BidiClass where reprPrec bc _ := s!"Unicode.BidiClass.{bc.toAbbrev}" -/-! - ## Scripts ## --/ - -/-- Check if valid script identifier -/ -@[inline] -def Script.isValid (c : UInt32) : Bool := - let c0 := (c >>> 24).toUInt8 - let c1 := (c >>> 16).toUInt8 - let c2 := (c >>> 8).toUInt8 - let c3 := c.toUInt8 - (c0 ≤ 'Z'.toUInt8 && 'A'.toUInt8 ≤ c0) - && (c1 ≤ 'z'.toUInt8 && 'a'.toUInt8 ≤ c1) - && (c2 ≤ 'z'.toUInt8 && 'a'.toUInt8 ≤ c2) - && (c3 ≤ 'z'.toUInt8 && 'a'.toUInt8 ≤ c3) - -/-- Script identifier type -/ -structure Script where - code : UInt32 - is_valid : Script.isValid code -deriving DecidableEq, Hashable - -namespace Script - -/-- Default value is `Zzzz` (`Unknown`) -/ -instance : Inhabited Script where - default := { - code := (((('Z'.val <<< 8 ||| 'z'.val) <<< 8) ||| 'z'.val) <<< 8) ||| 'z'.val - is_valid := by decide - } - -/-- String abbreviation of script -/ -@[extern "unicode_script_to_abbrev"] -def toAbbrev : Script → String - | ⟨c, _⟩ => - let c0 := Char.ofUInt8 (c >>> 24).toUInt8 - let c1 := Char.ofUInt8 (c >>> 16).toUInt8 - let c2 := Char.ofUInt8 (c >>> 8).toUInt8 - let c3 := Char.ofUInt8 c.toUInt8 - String.ofList [c0, c1, c2, c3] - -@[extern "unicode_script_of_abbrev"] -private opaque ofAbbrevAux (abbr : String) : UInt32 - -/-- Get script from abbreviation -/ -def ofAbbrev? (abbr : String.Slice) : Option Script := - if abbr.utf8ByteSize = 4 then - let code := ofAbbrevAux abbr.toString - if h : Script.isValid code then - some ⟨code, h⟩ - else - none - else - none - -@[inline, inherit_doc ofAbbrev?] -def ofAbbrev! (abbr : String.Slice) : Script := ofAbbrev? abbr |>.get! - -end Script - end Unicode diff --git a/UnicodeCLib/basic.c b/UnicodeCLib/basic.c index be91265f0..df7c30123 100644 --- a/UnicodeCLib/basic.c +++ b/UnicodeCLib/basic.c @@ -1,9 +1,5 @@ #include -#ifdef _WIN32 -#include -#else #include -#endif #include "basic.h" static inline int unicode_script_is_valid(uint32_t c) { diff --git a/UnicodeCLib/basic.h b/UnicodeCLib/basic.h index 9a973b54c..752bcc567 100644 --- a/UnicodeCLib/basic.h +++ b/UnicodeCLib/basic.h @@ -69,12 +69,6 @@ LEAN_EXPORT uint64_t unicode_prop_lookup(uint32_t c); LEAN_EXPORT uint64_t unicode_case_lookup(uint32_t c); -LEAN_EXPORT uint32_t unicode_script_lookup(uint32_t c); - -LEAN_EXPORT uint32_t unicode_script_of_abbrev(b_lean_obj_arg abbr); - -LEAN_EXPORT lean_obj_res unicode_script_to_abbrev(uint32_t scr); - #ifdef __cplusplus } #endif diff --git a/UnicodeData.lean b/UnicodeData.lean index 5aeb9a3e2..760376acf 100644 --- a/UnicodeData.lean +++ b/UnicodeData.lean @@ -6,4 +6,3 @@ Released under Apache 2.0 license as described in the file LICENSE. import UnicodeData.Aliases import UnicodeData.Basic import UnicodeData.PropList -import UnicodeData.Scripts diff --git a/lakefile.lean b/lakefile.lean index 9b2d0eb68..6b41b54a4 100644 --- a/lakefile.lean +++ b/lakefile.lean @@ -62,5 +62,4 @@ script updateUCD do downloadUCD [ "PropList.txt", "PropertyAliases.txt", "PropertyValueAliases.txt", - "Scripts.txt", ] diff --git a/lean-toolchain b/lean-toolchain index 2210cba4f..6c7e31fff 100644 --- a/lean-toolchain +++ b/lean-toolchain @@ -1 +1 @@ -leanprover/lean4:v4.30.0-rc1 +leanprover/lean4:v4.30.0-rc2 diff --git a/makeCLib.lean b/makeCLib.lean index 79d5e611b..0157bdcbc 100644 --- a/makeCLib.lean +++ b/makeCLib.lean @@ -191,45 +191,7 @@ static const unicode_data_t table[] = {" file.putStrLn s!"\{UINT32_C({c₀}),UINT32_C({c₁}),UINT64_C({d})}," file.putStrLn "};" -def scriptTable : Array (UInt32 × UInt32 × UInt64) := - let t := Scripts.data.toArray.flatMap fun (sc, t) => - let sc := Script.ofAbbrev! <| PropertyValueAliases.getShortName! "sc" sc - t.map fun (c₀, c₁) => (c₀, c₁, sc.code.toUInt64) - t.qsort fun (a, _) (b, _) => a < b - -def makeScriptC : IO Unit := - IO.FS.withFile (clibDir / "script.c") .write fun file => do - file.putStrLn s!"/* THIS IS A GENERATED FILE - DO NOT EDIT */ -#include \"basic.h\" - -#define TABLE_SIZE {scriptTable.size} -#define SCRIPT_UNKNOWN UINT32_C({Script.ofAbbrev! "Zzzz" |>.code})" - file.putStrLn " -static const unicode_data_t table[TABLE_SIZE]; - -uint32_t unicode_script_lookup(uint32_t c) { - unicode_data_t v; - if (c >= table[0].cmin) { - unsigned int lo = 0; - unsigned int hi = TABLE_SIZE; - unsigned int i = (lo + hi)/2; - while (i != lo) { - if (c < table[i].cmin) hi = i; - else lo = i; - i = (lo + hi)/2; - } - if (c <= table[i].cmax) return (uint32_t)table[i].data; - } - return SCRIPT_UNKNOWN; -} - -static const unicode_data_t table[] = {" - for (c₀, c₁, d) in scriptTable do - file.putStrLn s!"\{UINT32_C({c₀}),UINT32_C({c₁}),UINT64_C({d})}," - file.putStrLn "};" - def main : IO UInt32 := do makeCaseC makePropC - makeScriptC return 0 diff --git a/makeTables.lean b/makeTables.lean index f7c0e1a7c..dd45121e1 100644 --- a/makeTables.lean +++ b/makeTables.lean @@ -520,12 +520,6 @@ def mkWhiteSpace : Array (UInt32 × UInt32) := | (c₀, some c₁) => (c₀, c₁) | (c₀, none) => (c₀, c₀) -def mkScriptName : Array (UInt32 × String) := - let t := PropertyAliases.getValues! "Script" |>.map fun name => - let s := Script.ofAbbrev! <| PropertyValueAliases.getShortName! "Script" name - (s.code, name.toString) - t.qsort fun (a, _) (b, _) => a < b - def main (args : List String) : IO UInt32 := do let args := if args != [] then args else [ "Bidi_Class", @@ -536,7 +530,6 @@ def main (args : List String) : IO UInt32 := do "Default_Ignorable_Code_Point", "Name", "Numeric_Value", - "Script_Name", "White_Space"] let tableDir : System.FilePath := "."/"data"/"table" IO.FS.createDirAll tableDir @@ -774,13 +767,6 @@ def main (args : List String) : IO UInt32 := do else file.putStrLn <| toHexStringAux c₀ ++ ";" ++ toHexStringAux c₁ IO.println s!"Size: {(statsProp table).1} + {(statsProp table).2}" - | "Script_Name" => - IO.println s!"Generating table {arg}" - let table := mkScriptName - IO.FS.withFile (tableDir/(arg ++ ".txt")) .write fun file => do - for (c, name) in table do - file.putStrLn <| toHexStringAux c ++ ";" ++ name - IO.println s!"Size: {table.size}" | "Titlecase" => IO.println s!"Generating table {arg}" let table ← mkTitlecase