Fix multi-word subtypes (#31)

This commit is contained in:
GenevensiS
2024-01-28 15:03:12 +01:00
committed by GitHub
parent d3984b48bd
commit cd3c0887a6
2 changed files with 96 additions and 29 deletions

View File

@@ -14,6 +14,46 @@ lang_name := {
lang_setting := {
language()[input] or else languages[language().fallback][input] or else languages.English[input] or else ""
}
spaced_sub_type_regex :=
replace@(match: "'", replace: "") + ### standardize apostrophes
regex_escape + ### preemptive escape
replace@(match: ",", replace: "|") + ### match any sub type
replace@(match: "(^|\\|)[^ ]+(?=\\|)", replace: "") + ### eliminate sub types that don't contain spaces
replace@(match: "^\\|", replace: "") + ### eliminate starting |
replace@(match: "\\|$", replace: "") + ### eliminate trailing |
{ "^(" + input + ")" } ### only match start of string
complete_sub_type_list :=
{
input.word_list_artifact + "," +
input.word_list_battle + "," +
input.word_list_dungeon + "," +
input.word_list_land + "," +
(for each submenu in input.word_lists_basic do submenu + ",") +
input.word_list_enchantment + "," +
input.word_list_spell + "," +
input.word_list_planeswalker + "," +
(for each submenu in input.word_lists_race do submenu + ",") +
(for each submenu in input.word_lists_class do submenu + ",") +
(for each submenu in input.word_lists_plane do submenu + ",")
}
replace_spaced_sub_type_map :=
[
"de": replace@(replace: "", match: spaced_sub_type_regex(complete_sub_type_list(languages["Deutsch"])))
"en": replace@(replace: "", match: spaced_sub_type_regex(complete_sub_type_list(languages["English"])))
"es": replace@(replace: "", match: spaced_sub_type_regex(complete_sub_type_list(languages["Español"])))
"fr": replace@(replace: "", match: spaced_sub_type_regex(complete_sub_type_list(languages["Français"])))
"it": replace@(replace: "", match: spaced_sub_type_regex(complete_sub_type_list(languages["Italiano"])))
"ja": replace@(replace: "", match: spaced_sub_type_regex(complete_sub_type_list(languages["日本語"])))
"ko": replace@(replace: "", match: spaced_sub_type_regex(complete_sub_type_list(languages["한국어"])))
"pt-br": replace@(replace: "", match: spaced_sub_type_regex(complete_sub_type_list(languages["Português do Brasil"])))
"ru": replace@(replace: "", match: spaced_sub_type_regex(complete_sub_type_list(languages["Русский"])))
"zhs": replace@(replace: "", match: spaced_sub_type_regex(complete_sub_type_list(languages["汉语"])))
"zht": replace@(replace: "", match: spaced_sub_type_regex(complete_sub_type_list(languages["漢語"])))
"en_GB": replace@(replace: "", match: spaced_sub_type_regex(complete_sub_type_list(languages["English"])))
"en_CA": replace@(replace: "", match: spaced_sub_type_regex(complete_sub_type_list(languages["English"])))
]
# language scripts
spanish_number := {
input := remove_tags(input)

View File

@@ -1115,44 +1115,71 @@ super_type_filter := {
"<{tag}>{input}</{tag}>"
}
break_subtypes := split_text@(match: "<atom-sep>[^<]*</atom-sep>|</?word-list-[^>]*>", include_empty:false) # splitting at word-list tags is for backwards compatibility, when atom-sep was not yet inserted everywhere.
break_supertypes := split_text@(match: "<atom-sep>[^<]*</atom-sep>", include_empty:false)
sub_type_filter := {
input := remove_tag(tag: "<soft")
# What word list to use?
list_type_rest := if lang_setting("is_creature")(type) or lang_setting("is_kindred")(type) then "class-"+lang_setting("code")
split_at_spaces := split_text@(match: " +")
remove_leading_spaces := replace@(match: "^ +", replace: "")
sub_type_filter :=
{
subtype_separator := lang_setting("subtype_separator")
code := lang_setting("code")
input := replace(input, match: "<soft>" + subtype_separator + "</soft>", replace: "")
input := remove_tag(input, tag: "<soft")
input := remove_tag(input, tag: "<word-list")
### What word list to use?
list_type_rest := if lang_setting("is_creature")(type) or lang_setting("is_kindred")(type) then "class-"+code
else if lang_setting("is_land")(type) then "land"
else if lang_setting("is_artifact")(type) then "artifact"
else if lang_setting("is_enchantment")(type) then "enchantment"
else if lang_setting("is_spell")(type) then "spell"
else if lang_setting("is_planeswalker")(type) or lang_setting("is_emblem")(type) then "planeswalker"
else if lang_setting("is_plane")(type) then "plane-"+lang_setting("code")
else if lang_setting("is_plane")(type) then "plane-"+code
else if lang_setting("is_battle")(type) then "battle"
else if lang_setting("is_dungeon")(type) then "dungeon"
else nil
if list_type_rest != nil then (
if lang_setting("is_creature")(type) or lang_setting("is_kindred")(type) then (
list_type_first := "race-"+lang_setting("code")
) else (
list_type_first := list_type_rest
);
# wrap wordlist tag around each part
parts := break_subtypes()
checked_first := false
(for each part in parts do
if trim(part) == "" then ""
else if not checked_first then
(checked_first := true; "<word-list-{list_type_first}>{part}</word-list-{list_type_first}>")
else
languages[lang_name()].subtype_separator + "<word-list-{list_type_rest}>{part}</word-list-{list_type_rest}>"
) +
(if length(parts) > 0 then
# Add a new box at the end
"<soft>{languages[lang_name()].subtype_separator}</soft><word-list-{list_type_rest}></word-list-{list_type_rest}>"
else
"<word-list-{list_type_first}></word-list-{list_type_first}>"
if list_type_rest != nil then
(
### Transform subtype_separators into spaces, we'll transform them back later
input := replace(input, match: subtype_separator, replace: " ")
input := replace(input, match: remove_tag(subtype_separator, tag: "<atom-sep"), replace: " ")
input := remove_leading_spaces(input)
### Use race list for first sub type of creatures
list_type_first := if lang_setting("is_creature")(type) or lang_setting("is_kindred")(type)
then "race-"+code
else list_type_rest
### Wrap wordlist tag around each part
replace_spaced_sub_type := replace_spaced_sub_type_map[code]
max_count := 2*length(split_at_spaces(input))-1 ### We iterate on the words and the spaces
sub_types := for i from 0 to max_count do
(
if input == "" then "" else (
### Check for leading spaces
new_input := remove_leading_spaces(input)
if new_input != input then
(
spaces := replace(input, match: regex_escape(new_input) + "$", replace: "")
input := new_input
spaces
) else (
### Check for space separated sub types
new_input := replace_spaced_sub_type(input)
if new_input != input then
(
sub_type := replace(input, match: regex_escape(new_input) + "$", replace: "")
input := new_input
if i == 0 then "<word-list-{list_type_first}>" + sub_type + "</word-list-{list_type_first}>"
else "<word-list-{list_type_rest}>" + sub_type + "</word-list-{list_type_rest}>"
) else (
### Check for single word sub types
split := split_at_spaces(input)
input := if length(split) > 1 then replace(input, match: "^" + regex_escape(split.0), replace: "") else ""
if i == 0 then "<word-list-{list_type_first}>" + split.0 + "</word-list-{list_type_first}>"
else "<word-list-{list_type_rest}>" + split.0 + "</word-list-{list_type_rest}>")))
)
) else input # do nothing
### Add separators between types, keep additional spaces if there are more than one
sub_types := replace(sub_types, match: "(</word-list[^>]*>)( *)(<word-list)" replace: { substring(_2, begin: 1) + _1 + subtype_separator + _3})
### Add a new wordlist box at the end
if sub_types == "" then "<word-list-{list_type_first}></word-list-{list_type_first}>"
else sub_types + "<soft>" + subtype_separator + "</soft><word-list-{list_type_rest}></word-list-{list_type_rest}>"
) else input ### Do nothing if we don't know the type
}
# all sub types, for word list