Module:okm-translit

local export = {} local gsub = mw.ustring.gsub

local chars_Hani = require('Module:scripts').getByCode('Hani'):getCharacters local chars_Hang = require('Module:scripts').getByCode('Hang'):getCharacters

-- https://github.com/szc126/rime-slg-korean/blob/main/slg_break_jamo.yaml -- https://github.com/szc126/rime-slg-korean/blob/main/soolegi_yethangeul.custom.yaml local tt_complex = { ['ᄢ']='ᄇᄉᄀ', ['ᄣ']='ᄇᄉᄃ', ['ᄤ']='ᄇᄉᄇ', ['ᄥ']='ᄇᄉᄉ', ['ᄦ']='ᄇᄉᄌ', ['ᄳ']='ᄉᄇᄀ', ['ᄴ']='ᄉᄉᄉ', ['ꥥ']='ᄅᄀᄀ', ['ꥧ']='ᄅᄃᄃ', ['ꥪ']='ᄅᄇᄇ', ['ꥲ']='ᄇᄉᄐ', ['ꥵ']='ᄉᄉᄇ', ['ꥸ']='ᄌᄌᄒ', ['ᄁ']='ᄀᄀ', ['ᄄ']='ᄃᄃ', ['ᄈ']='ᄇᄇ', ['ᄊ']='ᄉᄉ', ['ᄍ']='ᄌᄌ', ['ᄓ']='ᄂᄀ', ['ᄔ']='ᄂᄂ', ['ᄕ']='ᄂᄃ', ['ᄖ']='ᄂᄇ', ['ᄗ']='ᄃᄀ', ['ᄘ']='ᄅᄂ', ['ᄙ']='ᄅᄅ', ['ᄚ']='ᄅᄒ', ['ᄜ']='ᄆᄇ', ['ᄞ']='ᄇᄀ', ['ᄟ']='ᄇᄂ', ['ᄠ']='ᄇᄃ', ['ᄡ']='ᄇᄉ', ['ᄧ']='ᄇᄌ', ['ᄨ']='ᄇᄎ', ['ᄩ']='ᄇᄐ', ['ᄪ']='ᄇᄑ', ['ᄬ']='ᄫᄫ', ['ᄭ']='ᄉᄀ', ['ᄮ']='ᄉᄂ', ['ᄯ']='ᄉᄃ', ['ᄰ']='ᄉᄅ', ['ᄱ']='ᄉᄆ', ['ᄲ']='ᄉᄇ', ['ᄵ']='ᄉᄋ', ['ᄶ']='ᄉᄌ', ['ᄷ']='ᄉᄎ', ['ᄸ']='ᄉᄏ', ['ᄹ']='ᄉᄐ', ['ᄺ']='ᄉᄑ', ['ᄻ']='ᄉᄒ', ['ᄽ']='ᄼᄼ', ['ᄿ']='ᄾᄾ', ['ᅁ']='ᄋᄀ', ['ᅂ']='ᄋᄃ', ['ᅃ']='ᄋᄆ', ['ᅄ']='ᄋᄇ', ['ᅅ']='ᄋᄉ', ['ᅆ']='ᄋᅀ', ['ᅇ']='ᄋᄋ', ['ᅈ']='ᄋᄌ', ['ᅉ']='ᄋᄎ', ['ᅊ']='ᄋᄐ', ['ᅋ']='ᄋᄑ', ['ᅍ']='ᄌᄋ', ['ᅏ']='ᅎᅎ', ['ᅑ']='ᅐᅐ', ['ᅒ']='ᄎᄏ', ['ᅓ']='ᄎᄒ', ['ᅖ']='ᄑᄇ', ['ᅘ']='ᄒᄒ', ['ᅚ']='ᄀᄃ', ['ᅛ']='ᄂᄉ', ['ᅜ']='ᄂᄌ', ['ᅝ']='ᄂᄒ', ['ᅞ']='ᄃᄅ', ['ꥠ']='ᄃᄆ', ['ꥡ']='ᄃᄇ', ['ꥢ']='ᄃᄉ', ['ꥣ']='ᄃᄌ', ['ꥤ']='ᄅᄀ', ['ꥦ']='ᄅᄃ', ['ꥨ']='ᄅᄆ', ['ꥩ']='ᄅᄇ', ['ꥫ']='ᄅᄫ', ['ꥬ']='ᄅᄉ', ['ꥭ']='ᄅᄌ', ['ꥮ']='ᄅᄏ', ['ꥯ']='ᄆᄀ', ['ꥰ']='ᄆᄃ', ['ꥱ']='ᄆᄉ', ['ꥳ']='ᄇᄏ', ['ꥴ']='ᄇᄒ', ['ꥶ']='ᄋᄅ', ['ꥷ']='ᄋᄒ', ['ꥹ']='ᄐᄐ', ['ꥺ']='ᄑᄒ', ['ꥻ']='ᄒᄉ', ['ꥼ']='ᅙᅙ',

['ᆅ']='＠ᅩ＠ᅡ＠', ['ᆒ']='＠ᅮ＠ᅥ＠', ['ᅹ']='＠ᅡ＠ᅩ', ['ᆄ']='＠ᅩ＠ᅡ', ['ᆆ']='＠ᅩ＠ᅥ', ['ᆑ']='＠ᅮ＠ᅥ', ['ᆥ']='＠ᅥ＠ᅡ', ['ᆐ']='＠ᅮᅥ＠', ['ힳ']='＠ᅩᅡ＠', ['ힷ']='＠ᅮᅡ＠', ['ᆁ']='ᅩ＠ᅥ＠', ['ᆌ']='ᅮ＠ᅥ＠', ['ᆧ']='ᅩ＠ᅡ＠', ['ힽ']='ᅵ＠ᅡᅩ', ['ힾ']='ᅵ＠ᅡ＠', ['ퟀ']='ᅵ＠ᅥ＠', ['ᅤ']='＠ᅡ＠', ['ᅨ']='＠ᅥ＠', ['ᅸ']='＠ᅡᅩ', ['ᅽ']='＠ᅥᅩ', ['ᅾ']='＠ᅥᅮ', ['ᆇ']='＠ᅩᅩ', ['ᆈ']='＠ᅩ＠', ['ᆎ']='＠ᅮᅡ', ['ᆏ']='＠ᅮᅥ', ['ᆓ']='＠ᅮᅮ', ['ᆔ']='＠ᅮ＠', ['ᆤ']='＠ᅡᅮ', ['ힲ']='＠ᅩᅡ', ['ힴ']='＠ᅩᅥ', ['ힸ']='＠ᅮᅩ', ['ᆙ']='ᅵ＠ᅡ', ['ᆦ']='ᅩ＠ᅡ', ['ힰ']='ᅩ＠ᅥ', ['ힵ']='ᅮ＠ᅥ', ['ힿ']='ᅵ＠ᅥ', ['ퟂ']='ᅵ＠ᅩ', ['ퟃ']='ᅵ＠ᅮ', ['ᅫ']='ᅩᅡ＠', ['ᅰ']='ᅮᅥ＠', ['ᆀ']='ᅩᅥ＠', ['ᆊ']='ᅮᅡ＠', ['ᆋ']='ᅮᅥᅳ', ['ᆗ']='ᅳᅵᅮ', ['ힱ']='ᅩᅩᅵ', ['ힶ']='ᅮᅵ＠', ['ힻ']='ᅳᅥ＠', ['ퟁ']='ᅵᅩᅵ', ['ퟆ']='ᆞᅥ＠', ['ᅣ']='＠ᅡ', ['ᅧ']='＠ᅥ', ['ᅭ']='＠ᅩ', ['ᅲ']='＠ᅮ', ['ᅢ']='ᅡ＠', ['ᅦ']='ᅥ＠', ['ᅪ']='ᅩᅡ', ['ᅬ']='ᅩ＠', ['ᅯ']='ᅮᅥ', ['ᅱ']='ᅮ＠', ['ᅴ']='ᅳ＠', ['ᅶ']='ᅡᅩ', ['ᅷ']='ᅡᅮ', ['ᅺ']='ᅥᅩ', ['ᅻ']='ᅥᅮ', ['ᅼ']='ᅥᅳ', ['ᅿ']='ᅩᅥ', ['ᆂ']='ᅩᅩ', ['ᆃ']='ᅩᅮ', ['ᆉ']='ᅮᅡ', ['ᆍ']='ᅮᅮ', ['ᆕ']='ᅳᅮ', ['ᆖ']='ᅳᅳ', ['ᆘ']='ᅵᅡ', ['ᆚ']='ᅵᅩ', ['ᆛ']='ᅵᅮ', ['ᆜ']='ᅵᅳ', ['ᆝ']='ᅵᆞ', ['ᆟ']='ᆞᅥ', ['ᆠ']='ᆞᅮ', ['ᆡ']='ᆞ＠', ['ᆢ']='ᆞᆞ', ['ᆣ']='ᅡᅳ', ['ힹ']='ᅳᅡ', ['ힺ']='ᅳᅥ', ['ힼ']='ᅳᅩ', ['ퟄ']='ᅵ＠', ['ퟅ']='ᆞᅡ',

['ᇄ']='ᆨᆺᆨ', ['ᇌ']='ᆯᆨᆺ', ['ᇏ']='ᆯᆮᇂ', ['ᇑ']='ᆯᆷᆨ', ['ᇒ']='ᆯᆷᆺ', ['ᇓ']='ᆯᆸᆺ', ['ᇔ']='ᆯᆸᇂ', ['ᇖ']='ᆯᆺᆺ', ['ᇞ']='ᆷᆺᆺ', ['ᇭ']='ᇰᆨᆨ', ['ퟎ']='ᆮᆮᆸ', ['ퟑ']='ᆮᆺᆨ', ['ퟕ']='ᆯᆨᆨ', ['ퟖ']='ᆯᆨᇂ', ['ퟗ']='ᆯᆯᆿ', ['ퟘ']='ᆯᆷᇂ', ['ퟙ']='ᆯᆸᆮ', ['ퟚ']='ᆯᆸᇁ', ['ퟜ']='ᆯᇹᇂ', ['ퟟ']='ᆷᆫᆫ', ['ퟡ']='ᆷᆸᆺ', ['ퟤ']='ᆸᆯᇁ', ['ퟧ']='ᆸᆺᆮ', ['ퟬ']='ᆺᆺᆨ', ['ퟭ']='ᆺᆺᆮ', ['ퟸ']='ᆽᆸᆸ', ['ᆩ']='ᆨᆨ', ['ᆪ']='ᆨᆺ', ['ᆬ']='ᆫᆽ', ['ᆭ']='ᆫᇂ', ['ᆰ']='ᆯᆨ', ['ᆱ']='ᆯᆷ', ['ᆲ']='ᆯᆸ', ['ᆳ']='ᆯᆺ', ['ᆴ']='ᆯᇀ', ['ᆵ']='ᆯᇁ', ['ᆶ']='ᆯᇂ', ['ᆹ']='ᆸᆺ', ['ᆻ']='ᆺᆺ', ['ᇃ']='ᆨᆯ', ['ᇅ']='ᆫᆨ', ['ᇆ']='ᆫᆮ', ['ᇇ']='ᆫᆺ', ['ᇈ']='ᆫᇫ', ['ᇉ']='ᆫᇀ', ['ᇊ']='ᆮᆨ', ['ᇋ']='ᆮᆯ', ['ᇍ']='ᆯᆫ', ['ᇎ']='ᆯᆮ', ['ᇐ']='ᆯᆯ', ['ᇕ']='ᆯᇦ', ['ᇗ']='ᆯᇫ', ['ᇘ']='ᆯᆿ', ['ᇙ']='ᆯᇹ', ['ᇚ']='ᆷᆨ', ['ᇛ']='ᆷᆯ', ['ᇜ']='ᆷᆸ', ['ᇝ']='ᆷᆺ', ['ᇟ']='ᆷᇫ', ['ᇠ']='ᆷᆾ', ['ᇡ']='ᆷᇂ', ['ᇣ']='ᆸᆯ', ['ᇤ']='ᆸᇁ', ['ᇥ']='ᆸᇂ', ['ᇧ']='ᆺᆨ', ['ᇨ']='ᆺᆮ', ['ᇩ']='ᆺᆯ', ['ᇪ']='ᆺᆸ', ['ᇬ']='ᇰᆨ', ['ᇮ']='ᇰᇰ', ['ᇯ']='ᇰᆿ', ['ᇱ']='ᇰᆺ', ['ᇲ']='ᇰᇫ', ['ᇳ']='ᇁᆸ', ['ᇵ']='ᇂᆫ', ['ᇶ']='ᇂᆯ', ['ᇷ']='ᇂᆷ', ['ᇸ']='ᇂᆸ', ['ᇺ']='ᆨᆫ', ['ᇻ']='ᆨᆸ', ['ᇼ']='ᆨᆾ', ['ᇽ']='ᆨᆿ', ['ᇾ']='ᆨᇂ', ['ᇿ']='ᆫᆫ', ['ퟋ']='ᆫᆯ', ['ퟌ']='ᆫᆾ', ['ퟍ']='ᆮᆮ', ['ퟏ']='ᆮᆸ', ['ퟐ']='ᆮᆺ', ['ퟒ']='ᆮᆽ', ['ퟓ']='ᆮᆾ', ['ퟔ']='ᆮᇀ', ['ퟛ']='ᆯᇰ', ['ퟞ']='ᆷᆫ', ['ퟠ']='ᆷᆷ', ['ퟢ']='ᆷᆽ', ['ퟣ']='ᆸᆮ', ['ퟥ']='ᆸᆷ', ['ퟦ']='ᆸᆸ', ['ퟨ']='ᆸᆽ', ['ퟩ']='ᆸᆾ', ['ퟪ']='ᆺᆷ', ['ퟫ']='ᆺᇦ', ['ퟮ']='ᆺᇫ', ['ퟯ']='ᆺᆽ', ['ퟰ']='ᆺᆾ', ['ퟱ']='ᆺᇀ', ['ퟲ']='ᆺᇂ', ['ퟳ']='ᇫᆸ', ['ퟴ']='ᇫᇦ', ['ퟵ']='ᇰᆷ', ['ퟶ']='ᇰᇂ', ['ퟷ']='ᆽᆸ', ['ퟹ']='ᆽᆽ', ['ퟺ']='ᇁᆺ', ['ퟻ']='ᇁᇀ',

-- compatibility jamo ['ㅩ']='ᄅᄀᄉ', ['ㅫ']='ᄅᄇᄉ', ['ㅴ']='ᄇᄉᄀ', ['ㅵ']='ᄇᄉᄃ', ['ㄲ']='ᄀᄀ', ['ㄸ']='ᄃᄃ', ['ㅃ']='ᄇᄇ', ['ㄳ']='ᄀᄉ', ['ㄵ']='ᄂᄌ', ['ㄶ']='ᄂᄒ', ['ㄺ']='ᄅᄀ', ['ㄻ']='ᄅᄆ', ['ㄼ']='ᄅᄇ', ['ㄽ']='ᄅᄉ', ['ㄾ']='ᄅᄐ', ['ㄿ']='ᄅᄑ', ['ㅀ']='ᄅᄒ', ['ㅄ']='ᄇᄉ', ['ㅆ']='ᄉᄉ', ['ㅉ']='ᄌᄌ', ['ㅥ']='ᄂᄂ', ['ㅦ']='ᄂᄃ', ['ㅧ']='ᄂᄉ', ['ㅨ']='ᄂᅀ', ['ㅪ']='ᄅᄃ', ['ㅬ']='ᄅᅀ', ['ㅭ']='ᄅᅙ', ['ㅮ']='ᄆᄇ', ['ㅯ']='ᄆᄉ', ['ㅰ']='ᄆᅀ', ['ㅲ']='ᄇᄀ', ['ㅳ']='ᄇᄃ', ['ㅶ']='ᄇᄌ', ['ㅷ']='ᄇᄐ', ['ㅹ']='ᄫᄫ', ['ㅺ']='ᄉᄀ', ['ㅻ']='ᄉᄂ', ['ㅼ']='ᄉᄃ', ['ㅽ']='ᄉᄇ', ['ㅾ']='ᄉᄌ', ['ㆀ']='ᄋᄋ', ['ㆂ']='ᅌᄉ', ['ㆃ']='ᅌᅀ', ['ㆅ']='ᄒᄒ', ['ㄱ']='ᄀ', ['ㄴ']='ᄂ', ['ㄷ']='ᄃ', ['ㄹ']='ᄅ', ['ㅁ']='ᄆ', ['ㅂ']='ᄇ', ['ㅅ']='ᄉ', ['ㅇ']='ᄋ', ['ㅈ']='ᄌ', ['ㅊ']='ᄎ', ['ㅋ']='ᄏ', ['ㅌ']='ᄐ', ['ㅍ']='ᄑ', ['ㅎ']='ᄒ', ['ㅤ']='ᅟ', -- filler ['ㅱ']='ᄝ', ['ㅸ']='ᄫ', ['ㅿ']='ᅀ', ['ㆁ']='ᅌ', ['ㆄ']='ᅗ', ['ㆆ']='ᅙ',

['ㆈ']='＠ᅩ＠ᅡᅵ', ['ㆋ']='＠ᅮ＠ᅥᅵ', ['ㆇ']='＠ᅩ＠ᅡ', ['ㆊ']='＠ᅮ＠ᅥ', ['ㅒ']='＠ᅡᅵ', ['ㅖ']='＠ᅥᅵ', ['ㅙ']='ᅩᅡᅵ', ['ㅞ']='ᅮᅥᅵ', ['ㆉ']='＠ᅩᅵ', ['ㆌ']='＠ᅮᅵ', ['ㅐ']='ᅡᅵ', ['ㅑ']='＠ᅡ', ['ㅔ']='ᅥᅵ', ['ㅕ']='＠ᅥ', ['ㅘ']='ᅩᅡ', ['ㅚ']='ᅩᅵ', ['ㅛ']='＠ᅩ', ['ㅝ']='ᅮᅥ', ['ㅟ']='ᅮᅵ', ['ㅠ']='＠ᅮ', ['ㅢ']='ᅳᅵ', ['ㅏ']='ᅡ', ['ㅓ']='ᅥ', ['ㅗ']='ᅩ', ['ㅜ']='ᅮ', ['ㅡ']='ᅳ', ['ㅣ']='ᅵ', ['ㆍ']='ᆞ', }

local tt = [==[ BREAK	1

%([一-鿿㐀-䶿𠀀-𮯯𰀀-𱍏]+%)	×
 * 1) remove hanja from (ex.) 사뎐(辭典)
 * 2) caps prob. isn't necessary since the "base" text is actually hangeul?
 * 3) Hani regex is a reasonable subset of Hani from Module:scripts/data,
 * 4) last checked on 20220221


 * 1) to yale

gᄋ	Ğ # voiced velar fricative /ɣ/ ᄋᄋ	Ő ＠ᅮ	yu ＠ᅩ	yo ᅩᅡ	wa ᅮᅥ	we ᅵᆞ	yo ᆞᆞ	yo
 * 1) non-simple

ᄀ	K ᄂ	N ᄃ	T ᄅ	L ᄆ	M ᄇ	P ᄉ	S ᄋ	Ø ᄌ	C ᄎ	CH ᄏ	KH ᄐ	TH ᄑ	PH ᄒ	H ᄝ	◆ ᄫ	Ƃ ᅗ	◆ ᄛ	◆ ᅌ	Ŋ ᅀ	Z ᅙ	Q ᄼ	◆ ᅎ	◆ ᅔ	◆ ᄾ	◆ ᅐ	◆ ᅕ	◆ ᅟ	× # filler
 * 1) choseong

＠	y ᅡ	a ᅥ	e ᅩ	wo ᅮ	wu ᅳ	u ᅵ	i ᆞ	o ᅠ	× # filler
 * 1) jungseong

ᆨ	k ᆫ	n ᆮ	t ᆯ	l ᆷ	m ᆸ	p ᆺ	s ᆼ	ø ᆽ	c ᆾ	ch ᆿ	kh ᇀ	th ᇁ	ph ᇂ	h ᇢ	◆ ᇦ	ƃ ᇴ	◆ ퟝ	◆ ᇰ	ŋ ᇫ	z ᇹ	q
 * 1) jongseong

〮	↑ 〯	→
 * 1) tone

([aiueo]+)([y]?)([↑→↓])	%1%3%2
 * 1) tone diacritic location

%-%-%-%-(.-[wyaiueo↑→↓]+)(y)	%1-%2 %-%-%-(.-[wyaiueo↑→↓]+[^wyaiueo ])([^wyaiueo ])	%1-%2 %-%-%-(.-[wyaiueo↑→↓]+)	%1- %-%-(.-)([wyaiueo])	%1-%2
 * 1) hyphens within syllables
 * 2) CV-y
 * 3) CVC-C
 * 4) CV-C
 * 5) C-V

(%))(%-?)i	%1%2y
 * 1) 子(ᄌᆞ)ㅣ

Ø	×

BREAK	2

↑	́ →	̌ ↓	̀

ğ	G ő	OO Ø	NG # capitalized hanja readings ø	ng ƃ	W Ŋ	NG # capitalized hanja readings ŋ	ng ]==]

tt = mw.text.trim(tt) tt = mw.ustring.gsub(tt, '%s*#[^\n]+', '') -- remove comments tt = mw.ustring.gsub(tt, '\n+', '\n') -- remove empty lines

local a, b, c, d = 'ᄀᄂᄃᄅᄆᄇᄉᄋᄌᄎᄏᄐᄑᄒᄝᄫᅗᄛᅌᅀᅙᄼᅎᅔᄾᅐᅕᅟ', '＠ᅡᅥᅩᅮᅳᅵᆞᅠ', 'ᆨᆫᆮᆯᆷᆸᆺᆼᆽᆾᆿᇀᇁᇂᇢᇦᇴퟝᇰᇫᇹ', '〮〯'

function export.tr(text, lang, sc) text = gsub(text, "%<%/?r[pt]%>", "") text = gsub(text, "%<%/?ruby%>", "")

if not mw.ustring.match(text, '[' .. chars_Hang .. ']') then return nil end

local bool_tone_marking = mw.ustring.find(text, ('[%s]'):format(d))

text = mw.ustring.toNFD(text)

text = mw.ustring.gsub(text, '.', tt_complex)

for line in mw.text.gsplit(tt, '\n') do		local _, __, pattern, repl = mw.ustring.find(line, '(.+)\t(.+)')

if pattern .. repl == 'BREAK1' then -- add period between hanja readings text = mw.ustring.gsub(text, '([' .. chars_Hani .. '])%((.-)%)', function(hanja, reading)				return hanja .. '(' .. mw.ustring.gsub(reading, ('([%s]+)'):format(a), '.%1') .. ')'			end)

if bool_tone_marking then -- move the location of tone marks for easier handling and -- mark low tone text = mw.ustring.gsub(text, ('([%s]+)([%s]+)([%s]*)([%s]*)'):format(a, b, c, d), function(a, b, c, d)					return a .. b .. (d ==  and '↓' or d) .. (c ==  and '' or c)				end) end elseif pattern .. repl == 'BREAK2' then text = mw.ustring.lower(text)

-- hanja readings -- ref. Module:Ethi-translit text = mw.ustring.gsub(text, '([' .. chars_Hani .. ']+)%((.-)%)', function(start_pos, hanja, reading, end_pos)				-- treat final ieung as null if tones are marked (is this a safe assumption?)				if bool_tone_marking then					reading = mw.ustring.gsub(reading, 'ø', '')				end				-- convert to uppercase				reading = mw.ustring.upper(reading)				return reading			end) -- remove hanja reading leading period text = mw.ustring.gsub(text, '^%.', '') text = mw.ustring.gsub(text, "%.", "") text = mw.ustring.gsub(text, '(%s)%.', '%1') else if repl == '×' then repl = '' end text = mw.ustring.gsub(text, pattern, repl) end end

-- track failed romanizations -- (black diamond instead of U+FFFD to avoid warnings when saving this page) if mw.ustring.match(text, '◆') then require('Module:debug').track('okm-translit/failed romanization') end

return text end

return export