sync wordlist update

2024-02-11 19:09:39 -06:00
parent 00d497c854
commit 429b775e03
2 changed files with 97 additions and 30 deletions
--- a/data/magic.mse-game/language
+++ b/data/magic.mse-game/language
@@ -14,6 +14,46 @@ lang_name := {
 lang_setting := {
 	language()[input] or else languages[language().fallback][input] or else languages.English[input] or else ""
 }
 spaced_sub_type_regex :=
 	replace@(match: "'", replace: "’") +					### standardize apostrophes
 	regex_escape +											### preemptive escape
 	replace@(match: ",", replace: "|") +					### match any sub type
 	replace@(match: "(^|\\|)[^ ]+(?=\\|)", replace: "") +	### eliminate sub types that don't contain spaces
 	replace@(match: "^\\|", replace: "") +					### eliminate starting |
 	replace@(match: "\\|$", replace: "") +					### eliminate trailing |
 	{ "^(" + input + ")" }									### only match start of string
 complete_sub_type_list :=
 {
 	input.word_list_artifact + "," +
 	input.word_list_battle + "," +
 	input.word_list_dungeon + "," +
 	input.word_list_land + "," +
 	(for each submenu in input.word_lists_basic do submenu + ",") +
 	input.word_list_enchantment + "," +
 	input.word_list_spell + "," +
 	input.word_list_planeswalker + "," +
 	(for each submenu in input.word_lists_race do submenu + ",") +
 	(for each submenu in input.word_lists_class do submenu + ",") +
 	(for each submenu in input.word_lists_plane do submenu + ",")
 }
 replace_spaced_sub_type_map :=
 [
 	"de":		replace@(replace: "", match: spaced_sub_type_regex(complete_sub_type_list(languages["Deutsch"])))
 	"en":		replace@(replace: "", match: spaced_sub_type_regex(complete_sub_type_list(languages["English"])))
 	"es":		replace@(replace: "", match: spaced_sub_type_regex(complete_sub_type_list(languages["Español"])))
 	"fr":		replace@(replace: "", match: spaced_sub_type_regex(complete_sub_type_list(languages["Français"])))
 	"it":		replace@(replace: "", match: spaced_sub_type_regex(complete_sub_type_list(languages["Italiano"])))
 	"ja":		replace@(replace: "", match: spaced_sub_type_regex(complete_sub_type_list(languages["日本語"])))
 	"ko":		replace@(replace: "", match: spaced_sub_type_regex(complete_sub_type_list(languages["한국어"])))
 	"pt-br":	replace@(replace: "", match: spaced_sub_type_regex(complete_sub_type_list(languages["Português do Brasil"])))
 	"ru":		replace@(replace: "", match: spaced_sub_type_regex(complete_sub_type_list(languages["Русский"])))
 	"zhs":		replace@(replace: "", match: spaced_sub_type_regex(complete_sub_type_list(languages["汉语"])))
 	"zht":		replace@(replace: "", match: spaced_sub_type_regex(complete_sub_type_list(languages["漢語"])))
 	"en_GB":	replace@(replace: "", match: spaced_sub_type_regex(complete_sub_type_list(languages["English"])))
 	"en_CA":	replace@(replace: "", match: spaced_sub_type_regex(complete_sub_type_list(languages["English"])))
 ]
 # language scripts
 spanish_number := {
 	input := remove_tags(input)
--- a/data/magic.mse-game/script
+++ b/data/magic.mse-game/script
@@ -1019,7 +1019,7 @@ text_filter :=
 		replace: {"<nosym>" + mana_filter_t() + "</nosym>"} ) +
 	# step 5 : add mana & tap symbols
 	replace@(
-		match: "([+=-][XYZ0-9/|]+)",
+		match: "(?<!\\/)([+=-][XYZ0-9|]+)(?!\\/)",
 		in_context: mana_context,
 		replace: {"<sym-auto>" + _1 + "</sym-auto>"} ) +
 	replace@(
@@ -1115,44 +1115,71 @@ super_type_filter := {
 	"<{tag}>{input}</{tag}>"
 }
-break_subtypes := split_text@(match: "<atom-sep>[^<]*</atom-sep>|</?word-list-[^>]*>", include_empty:false)		# splitting at word-list tags is for backwards compatibility, when atom-sep was not yet inserted everywhere.
+split_at_spaces := split_text@(match: " +")
-break_supertypes := split_text@(match: "<atom-sep>[^<]*</atom-sep>", include_empty:false)
+remove_leading_spaces := replace@(match: "^ +", replace: "")
-sub_type_filter := {
+sub_type_filter :=
-	input := remove_tag(tag: "<soft")
+{
-	# What word list to use?
+	subtype_separator := lang_setting("subtype_separator")
-	list_type_rest := if      lang_setting("is_creature")(type) or lang_setting("is_kindred")(type) 	then "class-"+lang_setting("code")
+	code := lang_setting("code")
 	input := replace(input, match: "<soft>" + subtype_separator + "</soft>", replace: "")
 	input := remove_tag(input, tag: "<soft")
 	input := remove_tag(input, tag: "<word-list")
 	### What word list to use?
 	list_type_rest := if      lang_setting("is_creature")(type) or lang_setting("is_kindred")(type) 	then "class-"+code
 	                  else if lang_setting("is_land")(type)        										then "land"
 	                  else if lang_setting("is_artifact")(type)    										then "artifact"
 	                  else if lang_setting("is_enchantment")(type) 										then "enchantment"
 	                  else if lang_setting("is_spell")(type)       										then "spell"
 	                  else if lang_setting("is_planeswalker")(type) or lang_setting("is_emblem")(type)	then "planeswalker"
-	                  else if lang_setting("is_plane")(type)											then "plane-"+lang_setting("code")
+	                  else if lang_setting("is_plane")(type)											then "plane-"+code
 	                  else if lang_setting("is_battle")(type)											then "battle"
 	                  else if lang_setting("is_dungeon")(type)											then "dungeon"
 					  else nil
-	if list_type_rest != nil then (
+	if list_type_rest != nil then
-		if lang_setting("is_creature")(type) or lang_setting("is_kindred")(type) then (
+	(
-			list_type_first := "race-"+lang_setting("code")
+		### Transform subtype_separators into spaces, we'll transform them back later
 		input := replace(input, match: subtype_separator, replace: " ")
 		input := replace(input, match: remove_tag(subtype_separator, tag: "<atom-sep"), replace: " ")
 		input := remove_leading_spaces(input)
 		### Use race list for first sub type of creatures
 		list_type_first :=	if lang_setting("is_creature")(type) or lang_setting("is_kindred")(type)
 							then "race-"+code
 							else list_type_rest
 		### Wrap wordlist tag around each part
 		replace_spaced_sub_type := replace_spaced_sub_type_map[code]
 		max_count := 2*length(split_at_spaces(input))-1		### We iterate on the words and the spaces
 		sub_types := for i from 0 to max_count do
 		(
 			if input == "" then "" else (
 			### Check for leading spaces
 			new_input := remove_leading_spaces(input)
 			if new_input != input then
 			(
 				spaces := replace(input, match: regex_escape(new_input) + "$", replace: "")
 				input := new_input
 				spaces
 			) else (
-			list_type_first := list_type_rest
+			### Check for space separated sub types
-		);
+			new_input := replace_spaced_sub_type(input)
-		# wrap wordlist tag around each part
+			if new_input != input then
-		parts := break_subtypes()
+			(
-		checked_first := false
+				sub_type := replace(input, match: regex_escape(new_input) + "$", replace: "")
-		(for each part in parts do
+				input := new_input
-			if trim(part) == "" then ""
+				if i == 0 then "<word-list-{list_type_first}>" + sub_type + "</word-list-{list_type_first}>"
-			else if not checked_first then
+				else "<word-list-{list_type_rest}>" + sub_type + "</word-list-{list_type_rest}>"
-				(checked_first := true; "<word-list-{list_type_first}>{part}</word-list-{list_type_first}>")
+			) else (
-			else
+			### Check for single word sub types
-				languages[lang_name()].subtype_separator + "<word-list-{list_type_rest}>{part}</word-list-{list_type_rest}>"
+			split := split_at_spaces(input)
-		) +
+			input := if length(split) > 1 then replace(input, match: "^" + regex_escape(split.0), replace: "") else ""
-		(if length(parts) > 0 then
+			if i == 0 then "<word-list-{list_type_first}>" + split.0 + "</word-list-{list_type_first}>"
-			# Add a new box at the end
+			else "<word-list-{list_type_rest}>" + split.0 + "</word-list-{list_type_rest}>")))
 			"<soft>{languages[lang_name()].subtype_separator}</soft><word-list-{list_type_rest}></word-list-{list_type_rest}>"
 		 else
 			"<word-list-{list_type_first}></word-list-{list_type_first}>"
 		)
-	) else input # do nothing
+		### Add separators between types, keep additional spaces if there are more than one
 		sub_types := replace(sub_types, match: "(</word-list[^>]*>)( *)(<word-list)" replace: { substring(_2, begin: 1) + _1 + subtype_separator + _3})
 		### Add a new wordlist box at the end
 		if sub_types == "" then "<word-list-{list_type_first}></word-list-{list_type_first}>"
 		else sub_types + "<soft>" + subtype_separator + "</soft><word-list-{list_type_rest}></word-list-{list_type_rest}>"
 	) else input	### Do nothing if we don't know the type
 }
 # all sub types, for word list