Jump to content
Main menu
Main menu
move to sidebar
hide
Navigation
Main page
Recent changes
Random page
Help about MediaWiki
Neighbors
Esassani
Mobai
Glenn
Mors
Creatures
Fish
Insects
Shore-Dwellers
Search
Search
English
Appearance
Create account
Log in
Personal tools
Create account
Log in
Pages for logged out editors
learn more
Contributions
Talk
Editing
Module:Ko-rm
Module
Discussion
English
Read
Edit source
View history
Tools
Tools
move to sidebar
hide
Actions
Read
Edit source
View history
General
What links here
Related changes
Special pages
Page information
Appearance
move to sidebar
hide
Warning:
You are not logged in. Your IP address will be publicly visible if you make any edits. If you
log in
or
create an account
, your edits will be attributed to your username, along with other benefits.
Anti-spam check. Do
not
fill this in!
-- based on Wiktionary [[wikt:Module:ko-translit]] [[wikt:Module:ko-pron]] [[wikt:Module:ko-pron/data]] [[wikt:Module:ko]] local p = {} local Ugsub = mw.ustring.gsub local Umatch = mw.ustring.match local Ufind = mw.ustring.find local Usub = mw.ustring.sub local Uchar = mw.ustring.char local codepoint = mw.ustring.codepoint local data = mw.loadData('Module:Ko-rm/data') local lib = require('Module:Feature') local CleanTT = require('Module:Tt').CleanTT local IPCvals = { ['nn'] = {}, ['ni'] = {}, ['bcred'] = { offset = -1, alias = {'bd', 'bc'} } } function p.main(frame) local args = require('Module:Arguments').getArgs(frame, { parentFirst = true, wrappers = {'Template:Ko-rm'} }) if (not args[1]) then return '' end return p._main(args) end function p._main(args) local str = CleanTT(mw.text.unstrip(args[1])) -- mw.logObject('input' .. args[1]) --debug -- mw.logObject('cleaned' .. str) --debug str = Ugsub(str, '%([一-鿿㐀-䶿𠀀-𰀀-]+%)', '') str = Ugsub(str, "%([一-鿿㐀-䶿𠀀-𰀀-]*'''[一-鿿㐀-䶿𠀀-𰀀-]+'''[一-鿿㐀-䶿𠀀-𰀀-]*%)", '') str = Ugsub(str, '[一-鿿㐀-䶿𠀀-𰀀-]+%((.-)%)', '%1') str = Ugsub(str, '<sup.-※.-</sup>', '') str = Ugsub(str, '<span.-title.->(.-)</span>', '%1') str = Ugsub(str, '<[%w%p]+:(.-)>', '%1') --for manual readings str = Ugsub(str, '<%->', '-') --for manual hyphenation str = Ugsub(str, '< >', ' ') --for manual spacing -- manual inserting of irregular pronunciation corrections (IPC) while (function() -- Run if any of the IPC are present for n, d in pairs(IPCvals) do if Ufind(str, '<' .. n .. '>') ~= nil then mw.logObject(n, 'n')return true end if d.alias and #d.alias>0 then for _, a in ipairs(d.alias) do if Ufind(str, '<' .. a .. '>') ~= nil then mw.logObject(a, 'a') return true end end end end return false end)() do local ref, rpl, pos, off = '', '', nil, 0 for n, d in pairs(IPCvals) do local vpos = Ufind(str, '<' .. n .. '>') if vpos ~= nil and (pos == nil or pos > vpos) then pos = Ufind(str, '<' .. n .. '>') rpl = n off = tonumber(d.offset) or 0 ref = n end if d.alias and #d.alias>0 then for _, a in ipairs(d.alias) do vpos = Ufind(str, '<' .. a .. '>') if vpos ~= nil and (pos == nil or pos > vpos) then pos = Ufind(str, '<' .. a .. '>') rpl = a off = tonumber(d.offset) or 0 ref = n end end end end if rpl ~= '' and ref ~= '' and pos ~= nil then str = Ugsub(str, '<' .. rpl .. '>', '', 1) pos = mw.ustring.len(Ugsub((Usub(str, 0, pos)), '[^가-힣 ]', '')) args[ref] = (args[ref] or '-1') .. ',' .. tostring(pos+off) end end if not Umatch(str, '[가-힣]') then return '' end --pronunciation exception(s) str = Ugsub(str, '여덟', '여덜') str = Ugsub(str, 'Ⅰ', '일') str = Ugsub(str, 'Ⅱ', '이') str = Ugsub(str, 'Ⅲ', '삼') str = Ugsub(str, 'Ⅳ', '사') str = Ugsub(str, 'Ⅴ', '오') str = Ugsub(str, 'Ⅵ', '육') str = Ugsub(str, 'Ⅶ', '칠') str = Ugsub(str, 'Ⅷ', '팔') str = Ugsub(str, 'Ⅸ', '구') str = Ugsub(str, 'Ⅹ', '십') -- pre-romanization punctuation conversion str = Ugsub(str, '[《「『【]', '“') str = Ugsub(str, '[》」』】]', '”') local revised = p.romanize(str, args) if (not revised) then return '' end --mw.logObject(revised,'revised') --debug if Umatch(revised, '[%.%?%!]') then revised = mw.ustring.upper(Usub(revised, 1, 1)) .. Usub(revised, 2, -1) revised = Ugsub(revised, "([%.%?%!]) ([a-z%'])", '%1 ^%2') revised = Ugsub(revised, "^%'%'%'", "'''^") end revised = Ugsub(revised, "([a-z])%-%'([a-z])", '%1-%2') revised = Ugsub(revised, "%^%'%'%'", "'''^") revised = Ugsub(revised, '%^%l', mw.ustring.upper) revised = Ugsub(revised, '%^', '') revised = Ugsub(revised, "%-'''%-", "'''-") revised = Ugsub(revised, '%-%-', '-') --punctuation fixing revised = Ugsub(revised, '…', '...') revised = Ugsub(revised, '!', '!') revised = Ugsub(revised, '?', '?') revised = Ugsub(revised, '”([A-Za-z])', '”-%1') revised = Ugsub(revised, '(//[^/@]-@@[^/@]-@@//)%-?([A-Za-z])', '%1-%2') revised = Ugsub(revised, '[·・]', ' - ') revised = Ugsub(revised, '——', '⸺') revised = Ugsub(revised, '——', '⸺') --secondary romanisation system while revised:find('^.-//[^/]-//.-$') do local pre, dur, dur3, post = string.match(revised, '^(.-)//([^@/]-)@@([^@/]-)@@//(.-)$') if dur3 ~= nil then --mw.logObject(pre,'pre') mw.logObject(dur,'dur') mw.logObject(post,'post') --debug if mw.ustring.lower(dur3) ~= mw.ustring.lower(dur) then dur = '<span style="border-bottom-width:1px; border-bottom-style:dotted; border-bottom-color:rgb(128, 128, 128); cursor:help;" title="Spelled: ' .. p.capitalizer(dur3, true):gsub('"','"') .. '">' .. p.capitalizer(dur, true) .. '</span>' else dur = p.capitalizer(dur, true) end else pre, dur, post = string.match(revised, '^(.-)//([^/]-)//(.-)$') end revised = pre .. dur .. post end --all case (|capi=1) or sentence case (|sent=1) if (args.capi or args.sent) then revised = p.capitalizer(revised, (args.capi or nil)) end --post-capitalization punctuation fixing revised = Ugsub(revised, "”'", '”-') revised = Ugsub(revised, '[“”]', '"') revised = Ugsub(revised, '([%a])(%d+)', '%1-%2') revised = Ugsub(revised, '(%d+)([%a])', '%1-%2') revised = Ugsub(revised, '(%d+)-[Pp]x', '%1px') --lazy fix for accidental hyphenation of pixel amounts return revised end function p.romanize(text_param, args) local P, optional_params = {}, { 'nn', 'ni', 'bcred' } for _, pm in ipairs(optional_params) do P[pm] = { } if args[pm] then for pp in lib.gsplit(args[pm], ',', {removeEmpty=true}) do P[pm][tonumber(pp) or pp] = 1 end end end --mw.logObject(P,'P') --debug local T_index, T_next_index = 0,0 local rom3 = false text_param = Ugsub(text_param, '["](.)', '%1') for primitive_word in mw.ustring.gmatch(text_param, '[%-ᄀ-ᄒ' .. 'ᅡ-ᅵ' .. 'ᆨ-ᇂ' .. "ㄱ-ㅣ가-힣' /「」%^]+") do --mw.logObject(primitive_word,'primitive_word') --debug --mw.logObject(text_param,'text_param') --debug local the_original = primitive_word primitive_word = Ugsub(primitive_word, "'''", 'ß') local bold_position, bold_count = {}, 0 while Umatch(primitive_word, 'ß') do bold_position[(mw.ustring.find(primitive_word, 'ß')) + bold_count] = true primitive_word = Ugsub(primitive_word, 'ß', '', 1) bold_count = bold_count + 1 end local word_set = { primitive_word } local word_set_romanisations = {} for _, respelling in ipairs(word_set) do --mw.logObject(word_set,'word_set') --debug --mw.logObject(respelling,'respelling') --debug local decomposed_syllables = p.decompose_syllable(respelling) --mw.logObject(decomposed_syllables,'decomposed_syllables') --debug local romanisation = {} local romanisation3 = {} local bold_insert_count = 0 for index = 0, #decomposed_syllables, 1 do if index ~= 0 then T_index = T_index + 1 end local this_syllable_text = index ~= 0 and Usub(respelling, index, index) or '' local forced = '' --mw.logObject(this_syllable_text,'this_syllable_text_I') --debug while Umatch(this_syllable_text, '[/「」^]') do forced = forced .. this_syllable_text respelling = Usub(respelling, 2, -1) this_syllable_text = index ~= 0 and Usub(respelling, index, index) or '' end --mw.logObject(forced,'forced') --debug if (forced:find('//') and (not rom3)) then rom3 = true elseif forced:find('//') then rom3 = false end --mw.logObject(this_syllable_text,'this_syllable_text_F') --debug if this_syllable_text == '-' then if ((not rom3) and #romanisation3 > 0) then table.remove(romanisation3) table.remove(romanisation3) table.insert(romanisation, #romanisation-(#romanisation>0 and 1 or 0), '@@' .. Ugsub(table.concat(romanisation3), "[^A-Za-z\"]$", '') .. '@@') romanisation3 = {} table.insert(romanisation, #romanisation-(#romanisation>0 and 1 or 0), forced) end else T_next_index = T_index local syllable = decomposed_syllables[index] or { initial = 'Ø', vowel = 'Ø', final = 'X' } local next_index = index local next_syllable_text local saw_hyphen_after = false while true do next_index = next_index + 1 T_next_index = T_next_index + 1 next_syllable_text = next_index > #decomposed_syllables and '' or Usub(respelling, next_index, next_index) if next_syllable_text ~= '-' then break end saw_hyphen_after = true end local next_syllable = decomposed_syllables[next_index] or { initial = 'Ø', vowel = 'Ø', final = 'Ø' } syllable.final = data.FSC[syllable.final] or syllable.final if this_syllable_text == '넓' then if Umatch(next_syllable.initial, '[ᄌᄉ]') then syllable.final = 'ᆸ' elseif next_syllable.initial == 'ᄃ' then if Umatch(next_syllable.vowel, '[^ᅡᅵ]') then syllable.final = 'ᆸ' end end end local vowel = data.vowels[syllable.vowel][2] if P.nn[T_next_index] and Umatch(syllable.final .. next_syllable.initial, 'ᆫᄅ') then next_syllable.initial = 'ᄂ' end if P.ni[T_next_index] and next_syllable.initial == 'ᄋ' and Umatch(next_syllable.vowel, '[ᅵᅣᅧᅭᅲ]') then next_syllable.initial = 'ᄂ' end if P.bcred[T_index] then syllable.final = data.boundary[syllable.final .. '-Ø'][1] end if index ~= 0 and this_syllable_text == '밟' and not Umatch(next_syllable.initial, '[ᄋᄒ]') then syllable.final = 'ᆸ' end if Umatch(this_syllable_text, '[닭뷁삵슭앍줅찱칡탉흙]') and not Umatch(next_syllable.initial .. ';' .. next_syllable.vowel, 'ᄋ;[ᅦᅧᅳᅴᅵ]') then syllable.final = 'ᆨ' end if next_syllable_text == '없' then if Umatch(syllable.final, '[ᆩᆪᆰᆿ]') then syllable.final = 'ᆨ' elseif Umatch(syllable.final, '[ᆬᆭ]') then syllable.final = 'ᆫ' elseif Umatch(syllable.final, '[ᆺᆻᆽᆾᇀ]') then syllable.final = 'ᆮ' elseif Umatch(syllable.final, '[ᆲᆳᆴᆶ]') then syllable.final = 'ᆯ' elseif syllable.final == 'ᆱ' then syllable.final = 'ᆷ' elseif Umatch(syllable.final, '[ᆵᆹᇁ]') then syllable.final = 'ᆸ' end end if (not P.bcred[T_index]) then if Umatch(syllable.final .. next_syllable.initial, 'ᇀᄋ') then if Umatch(next_syllable.vowel, '[ᅵᅧ]') then syllable.final = 'ᆾ' end elseif Umatch(syllable.final .. next_syllable.initial, 'ᆴᄋ') then if Umatch(next_syllable.vowel, '[ᅵᅧ]') then syllable.final = 'ᆯ' next_syllable.initial = 'ᄎ' end elseif Umatch(syllable.final .. next_syllable.initial, 'ᆮᄋ') and tonumber(s_variation or -1) ~= index then if Umatch(next_syllable.vowel, '[ᅵᅧ]') then syllable.final = 'ᆽ' end elseif Umatch(syllable.final .. next_syllable.initial, 'ᆮᄒ') then if Umatch(next_syllable.vowel, '[ᅵᅧ]') then syllable.final = 'ᆾ' next_syllable.initial = 'ᄋ' end end end if syllable.final .. next_syllable.initial == 'ᆺᄋ' and not Umatch(next_syllable_text, '[아았어었에으은을음읍의이인일임입있]') then syllable.final = 'ᆮ' end local bound = syllable.final .. '-' .. next_syllable.initial if (not data.boundary[bound]) then mw.log('No boundary data for ' .. bound .. '.') return nil end local junction = data.boundary[bound][2] local junction3 = data.boundary[bound][3] or data.boundary[bound][2] --mw.logObject(junction, 'junction') --debug --mw.logObject(junction3, 'junction3') --debug if bold_position[index + bold_insert_count + 1] then junction = Ugsub(junction, '^.*$', function(matched) local a, b = string.match(matched, '^(ng);(.*)$') if ((not a) and (not b)) then a, b = string.match(matched, '^(.?%-?);(.*)$') end return Umatch(syllable.final .. next_syllable.initial, '^Ø?[ᄀ-ᄒ]$') and "'''" .. (a or '') .. ';' .. (b or '') or (a or '') .. "'''" .. ';' .. (b or '') end) bold_insert_count = bold_insert_count + 1 end local final_cons, initial_cons = Umatch(junction, '^(.*);(.*)$') --special romanisation if rom3 then if (#romanisation3 == 0 and #romanisation > 0) then table.insert(romanisation3, romanisation[#romanisation]) end local final_cons3, initial_cons3 = Umatch(junction3, '^(.*);(.*)$') table.insert(romanisation3, vowel) table.insert(romanisation3, final_cons3) table.insert(romanisation3, (saw_hyphen_after and '-' or '')) table.insert(romanisation3, initial_cons3) elseif ((not rom3) and #romanisation3 > 0) then table.remove(romanisation3) table.remove(romanisation3) table.insert(romanisation, #romanisation-(#romanisation>0 and 1 or 0), '@@' .. Ugsub(table.concat(romanisation3), "[^A-Za-z\"]$", '') .. '@@') romanisation3 = {} end table.insert(romanisation, #romanisation-(#romanisation>0 and 1 or 0), forced) table.insert(romanisation, vowel) table.insert(romanisation, final_cons) table.insert(romanisation, (saw_hyphen_after and '-' or '')) table.insert(romanisation, initial_cons) --straggler characters at end of word set if index == #decomposed_syllables and lib.isNotEmpty(Usub(respelling, index+1, index+1)) then local N = Usub(respelling, index+1, #respelling) if (N:find('//') and #romanisation3 > 0) then table.remove(romanisation3) table.remove(romanisation3) table.insert(romanisation, #romanisation-(#romanisation>0 and 1 or 0), '@@' .. Ugsub(table.concat(romanisation3), "[^A-Za-z\"]$", '') .. '@@') romanisation3 = {} table.insert(romanisation, N) else romanisation3 = {} end end --[[ local currRom = { syllable = syllable, vowel = vowel, final_cons = final_cons, initial_cons = initial_cons, totalRom = table.concat(romanisation), totalRom3 = table.concat(romanisation3) } --debug mw.logObject(currRom,'currRom') --debug --]] end end local temp_romanisation = table.concat(romanisation) --mw.logObject(temp_romanisation,'temp_romanisation') --debug for i = 1, 2 do temp_romanisation = Ugsub(temp_romanisation, '(.)…(.)', function(a, b) return a .. (data.AI[a .. b] and "'" or '') .. b end) temp_romanisation = Ugsub(temp_romanisation, "wo'e", 'woe') temp_romanisation = Ugsub(temp_romanisation, "yo'e", 'yoe') temp_romanisation = Ugsub(temp_romanisation, "we'o", 'weo') temp_romanisation = Ugsub(temp_romanisation, "we'u", 'weu') temp_romanisation = Ugsub(temp_romanisation, "ye'u", 'yeu') temp_romanisation = Ugsub(temp_romanisation, "yu'i", 'yui') end table.insert(word_set_romanisations, temp_romanisation) end text_param = Ugsub( text_param, p.pattern_escape(the_original), table.concat(word_set_romanisations, '/'), 1 ) end return text_param end function p.decompose_jamo(syllable) if (not Umatch(syllable, '[가-힣]')) then if Umatch(syllable, '[ᄀ-ᄒ]') then return { initial = syllable, vowel = 'Ø', final = 'Ø' } elseif Umatch(syllable, '[ᅡ-ᅵ]') then return { initial = 'Ø', vowel = syllable, final = 'Ø' } elseif Umatch(syllable, '[ᆨ-ᇂ]') then return { initial = 'Ø', vowel = 'Ø', final = syllable } elseif Umatch(syllable, '[ㄱ-ㆎ]') then return { initial = 'Ø', vowel = 'Ø', final = syllable } else return { initial = 'Ø', vowel = ' ', final = 'X' } end end local cp = codepoint(syllable) if (not cp) then return { '', '', '' } end local relative_cp = cp - 0xAC00 local jongseong = (((relative_cp % 28) ~= 0) and Uchar(0x11A7 + (relative_cp % 28))) or '' local jungseong = Uchar(0x1161 + math.floor((relative_cp % 588) / 28)) local choseong = Uchar(0x1100 + math.floor(relative_cp / 588)) return { initial = choseong, vowel = jungseong, final = jongseong } end function p.pattern_escape(text) if type(text) == 'table' then text = text.args[1] end text = Ugsub(text, '([%^$()%%.%[%]*+%-?])', '%%%1') return text end function p.decompose_syllable(word) local decomposed_syllables = {} for syllable in mw.text.gsplit(word, '') do --mw.logObject(syllable,'syllable') --debug if not Umatch(syllable, '[/「」%^]') then table.insert(decomposed_syllables, p.decompose_jamo(syllable)) end end return decomposed_syllables end function p.capitalizer(str, all) if lib.isNotEmpty(str) then str = mw.text.split(str,'') --mw.logObject(str,'str') --debug local cap = true for index = 1,#str do if (str[index]:find(((all ~= nil) and "[^A-Za-z\-\"_#&]" or "[^A-Za-z\-\"_,%s#&]")) and str[index] ~= "'") or (cap and str[index] == ' ')then cap = true --mw.logObject(str[index],'skipped') --debug elseif cap and str[index] == '_' then cap = false str[index] = '' elseif cap then str[index] = mw.ustring.upper(str[index]) --mw.logObject(str[index],'capped') --debug cap = false end end str = table.concat(str,'') end return str end function p.strip(str) if lib.isEmpty(str) then return '' end str = Ugsub(str, '//(.-)//', '%1' ) --remove given name specifier str = Ugsub(str, '%^', '' ) --remove capitalization marker str = Ugsub(str, '<.>', '' ) --remove arbitrary separator str = Ugsub(str, '<(%w+)>', --remove manual IPC markers function(term) for n, d in pairs(IPCvals) do if n == term then return '' end if d.alias and #d.alias>0 then for _, a in ipairs(d.alias) do if a == term then return '' end end end end return nil end) str = Ugsub(str, '_', '' ) --remove capitalization blacklister str = Ugsub(str, '<([%w%p]+):.->', '%1' ) --reduce manual readings to just the text return str end return p
Summary:
Please note that all contributions to The Petit Planet Wiki are considered to be released under the Creative Commons Attribution-NonCommercial-ShareAlike (see
Petit Planet:Copyrights
for details). If you do not want your writing to be edited mercilessly and redistributed at will, then do not submit it here.
You are also promising us that you wrote this yourself, or copied it from a public domain or similar free resource.
Do not submit copyrighted work without permission!
Cancel
Editing help
(opens in new window)
Preview page with this template
Below are commonly used wikitext from
MediaWiki:Edittools
. Click on any to insert them in the edit box.
File Pages
==Summary==
==Licensing==
{{Fairuse}}
{{File|}}
Dynamic Page List
¦
²
«
»
²{¦}²
«»
«/»
«!----»
Array in DPL
¹
^2{
}^2
^2{¹}^2
Parser Functions
{{#if:||}}
{{#if:<condition>|<result if true>|<result if false>}}
{{#ifeq:|||}}
{{#ifeq:<text1>|<text2>|<result if text1=text2>|<result if text1≠text2>}}
{{#expr:}}
{{#expr:<mathematical expression>}}
{{#switch:||#default=}}
{{#switch:<text1>|<text2>=<result if text1=text2>|<text3>=<result if text1=text3>|#default=<result if no valid match>}}
{{#replace:||}}
{{#replace:<text1>|<plain text to find in text1>|<plain text to insert in place of the text found>}}
{{#titleparts:}}
{{#replace:<page name>}}
{{#tag:|}}
{{#tag:<name of html tag>|<content inside html tag>}}
Markup
{{}}
|
[]
[[]]
[[Category:]]
#REDIRECT [[]] [[Category:Redirect Pages]]
<code></code>
<includeonly></includeonly>
<noinclude></noinclude>
<nowiki></nowiki>
<!---->
<br>
§
Notices
{{Stub|}}
{{Stub Dialogue}}
{{Upcoming}}
{{Under Construction}}
{{Placeholder|}}
Magic Words
{{!}}
{{formatnum:}}
{{lc:}}
{{uc:}}
{{PAGENAME}}
{{FULLPAGENAME}}
{{ROOTPAGENAME}}
{{BASEPAGENAME}}
{{SUBPAGENAME}}
{{DISPLAYTITLE:|noreplace}}
__TOC__
__NOTOC__
__EXPECTUNUSEDCATEGORY__
__HIDDENCAT__
HTML Entities
—
,
­
Template used on this page:
Module:Ko-rm/doc
(
edit
)