Toggle menu
7
27
38
5.2K
Sanarchive
Toggle preferences menu
Toggle personal menu
Not logged in
Your IP address will be publicly visible if you make any edits.

Documentation for this module may be created at Module:Lang/data/is latn data/doc

--[[--------------------------< S I N G L E S _ T >-----------------------------------------------------------

list of Latn and Zyyy (common) codepoints that are not included in <ranges_t> taken from Module:Unicode data/scripts
and a local copy of https://www.unicode.org/Public/16.0.0/ucd/ScriptExtensions.txt

]]

local singles_t = {
	[170] = true,																-- 00AA
	[186] = true,																-- 00BA
	[215] = true,																-- 00D7
	[247] = true,																-- 00F7
	[787] = true,																-- 0313
	[800] = true,																-- 0320
	[856] = true,																-- 0358
	[862] = true,																-- 035E
	[884] = true,																-- 0374
	[894] = true,																-- 037E
	[901] = true,																-- 0385
	[903] = true,																-- 0387
	[1541] = true,																-- 0605
	[1548] = true,																-- 060C
	[1563] = true,																-- 061B
	[1567] = true,																-- 061F
	[1600] = true,																-- 0640
	[1757] = true,																-- 06DD
	[2274] = true,																-- 08E2
	[3647] = true,																-- 0E3F
	[4347] = true,																-- 10FB
	[6149] = true,																-- 1805
	[7379] = true,																-- 1CD3
	[7393] = true,																-- 1CE1
	[7418] = true,																-- 1CFA
	[7672] = true,																-- 1DF8
	[8305] = true,																-- 2071
	[8319] = true,																-- 207F
	[8432] = true,																-- 20F0
	[8498] = true,																-- 2132
	[8526] = true,																-- 214E
	[12294] = true,																-- 3006
	[12448] = true,																-- 30A0
	[12783] = true,																-- 31EF
	[13055] = true,																-- 32FF
	[42963] = true,																-- A7D3
	[43310] = true,																-- A92E
	[43471] = true,																-- A9CF
	[43867] = true,																-- AB5B
	[65279] = true,																-- FEFF
	[65392] = true,																-- FF70
	[119970] = true,															-- 1D4A2
	[119995] = true,															-- 1D4BB
	[120134] = true,															-- 1D546
	[129008] = true,															-- 1F7F0
	[917505] = true,															-- E0001
	}


--[[--------------------------< R A N G E S _ T >-------------------------------------------------------------

list of Latn and Zyyy (common) codepoints taken from Module:Unicode data/scripts and a local copy of
https://www.unicode.org/Public/16.0.0/ucd/ScriptExtensions.txt

]]

local ranges_t = {
	{0, 169},																	-- 0000..00A9
	{171, 185},																	-- 00AB..00B9
	{187, 214},																	-- 00BB..00D6
	{216, 246},																	-- 00D8..00F6
	{248, 745},																	-- 00F8..02E9
	{748, 782},																	-- 02EC..030E
	{784, 785},																	-- 0310..0311
	{803, 805},																	-- 0323..0325
	{813, 814},																	-- 032D..032E
	{816, 817},																	-- 0330..0331
	{867, 879},																	-- 0363..036F
	{1157, 1158},																-- 0485..0486
	{2385, 2386},																-- 0951..0952
	{2404, 2405},																-- 0964..0965
	{4053, 4056},																-- 0FD5..0FD8
	{5867, 5869},																-- 16EB..16ED
	{5941, 5942},																-- 1735..1736
	{6146, 6147},																-- 1802..1803
	{7401, 7404},																-- 1CE9..1CEC
	{7406, 7411},																-- 1CEE..1CF3
	{7413, 7415},																-- 1CF5..1CF7
	{7424, 7461},																-- 1D00..1D25
	{7468, 7516},																-- 1D2C..1D5C
	{7522, 7525},																-- 1D62..1D65
	{7531, 7543},																-- 1D6B..1D77
	{7545, 7614},																-- 1D79..1DBE
	{7680, 7935},																-- 1E00..1EFF
	{8192, 8203},																-- 2000..200B
	{8206, 8292},																-- 200E..2064
	{8294, 8304},																-- 2066..2070
	{8308, 8318},																-- 2074..207E
	{8320, 8334},																-- 2080..208E
	{8336, 8348},																-- 2090..209C
	{8352, 8384},																-- 20A0..20C0
	{8448, 8485},																-- 2100..2125
	{8487, 8497},																-- 2127..2131
	{8499, 8525},																-- 2133..214D
	{8527, 8587},																-- 214F..218B
	{8592, 9257},																-- 2190..2429
	{9280, 9290},																-- 2440..244A
	{9312, 10239},																-- 2460..27FF
	{10496, 11123},																-- 2900..2B73
	{11126, 11157},																-- 2B76..2B95
	{11159, 11263},																-- 2B97..2BFF
	{11360, 11391},																-- 2C60..2C7F
	{11776, 11869},																-- 2E00..2E5D
	{12272, 12292},																-- 2FF0..3004
	{12296, 12320},																-- 3008..3020
	{12336, 12343},																-- 3030..3037
	{12348, 12351},																-- 303C..303F
	{12443, 12444},																-- 309B..309C
	{12539, 12540},																-- 30FB..30FC
	{12688, 12703},																-- 3190..319F
	{12736, 12773},																-- 31C0..31E5
	{12832, 12895},																-- 3220..325F
	{12927, 13007},																-- 327F..32CF
	{13144, 13311},																-- 3358..33FF
	{19904, 19967},																-- 4DC0..4DFF
	{42752, 42957},																-- A700..A7CD
	{42960, 42961},																-- A7D0..A7D1
	{42965, 42972},																-- A7D5..A7DC
	{42994, 43007},																-- A7F2..A7FF
	{43056, 43065},																-- A830..A839
	{43824, 43866},																-- AB30..AB5A
	{43868, 43876},																-- AB5C..AB64
	{43878, 43883},																-- AB66..AB6B
	{64256, 64262},																-- FB00..FB06
	{64830, 64831},																-- FD3E..FD3F
	{65040, 65049},																-- FE10..FE19
	{65072, 65106},																-- FE30..FE52
	{65108, 65126},																-- FE54..FE66
	{65128, 65131},																-- FE68..FE6B
	{65281, 65381},																-- FF01..FF65
	{65438, 65439},																-- FF9E..FF9F
	{65504, 65510},																-- FFE0..FFE6
	{65512, 65518},																-- FFE8..FFEE
	{65529, 65533},																-- FFF9..FFFD
	{65792, 65794},																-- 10100..10102
	{65799, 65843},																-- 10107..10133
	{65847, 65855},																-- 10137..1013F
	{65936, 65948},																-- 10190..1019C
	{66000, 66044},																-- 101D0..101FC
	{66273, 66299},																-- 102E1..102FB
	{67456, 67461},																-- 10780..10785
	{67463, 67504},																-- 10787..107B0
	{67506, 67514},																-- 107B2..107BA
	{113824, 113827},															-- 1BCA0..1BCA3
	{117760, 118009},															-- 1CC00..1CCF9
	{118016, 118451},															-- 1CD00..1CEB3
	{118608, 118723},															-- 1CF50..1CFC3
	{118784, 119029},															-- 1D000..1D0F5
	{119040, 119078},															-- 1D100..1D126
	{119081, 119142},															-- 1D129..1D166
	{119146, 119162},															-- 1D16A..1D17A
	{119171, 119172},															-- 1D183..1D184
	{119180, 119209},															-- 1D18C..1D1A9
	{119214, 119274},															-- 1D1AE..1D1EA
	{119488, 119507},															-- 1D2C0..1D2D3
	{119520, 119539},															-- 1D2E0..1D2F3
	{119552, 119638},															-- 1D300..1D356
	{119648, 119672},															-- 1D360..1D378
	{119808, 119892},															-- 1D400..1D454
	{119894, 119964},															-- 1D456..1D49C
	{119966, 119967},															-- 1D49E..1D49F
	{119973, 119974},															-- 1D4A5..1D4A6
	{119977, 119980},															-- 1D4A9..1D4AC
	{119982, 119993},															-- 1D4AE..1D4B9
	{119997, 120003},															-- 1D4BD..1D4C3
	{120005, 120069},															-- 1D4C5..1D505
	{120071, 120074},															-- 1D507..1D50A
	{120077, 120084},															-- 1D50D..1D514
	{120086, 120092},															-- 1D516..1D51C
	{120094, 120121},															-- 1D51E..1D539
	{120123, 120126},															-- 1D53B..1D53E
	{120128, 120132},															-- 1D540..1D544
	{120138, 120144},															-- 1D54A..1D550
	{120146, 120485},															-- 1D552..1D6A5
	{120488, 120779},															-- 1D6A8..1D7CB
	{120782, 120831},															-- 1D7CE..1D7FF
	{122624, 122654},															-- 1DF00..1DF1E
	{122661, 122666},															-- 1DF25..1DF2A
	{126065, 126132},															-- 1EC71..1ECB4
	{126209, 126269},															-- 1ED01..1ED3D
	{126976, 127019},															-- 1F000..1F02B
	{127024, 127123},															-- 1F030..1F093
	{127136, 127150},															-- 1F0A0..1F0AE
	{127153, 127167},															-- 1F0B1..1F0BF
	{127169, 127183},															-- 1F0C1..1F0CF
	{127185, 127221},															-- 1F0D1..1F0F5
	{127232, 127405},															-- 1F100..1F1AD
	{127462, 127487},															-- 1F1E6..1F1FF
	{127489, 127490},															-- 1F201..1F202
	{127504, 127547},															-- 1F210..1F23B
	{127552, 127560},															-- 1F240..1F248
	{127568, 127569},															-- 1F250..1F251
	{127584, 127589},															-- 1F260..1F265
	{127744, 128727},															-- 1F300..1F6D7
	{128732, 128748},															-- 1F6DC..1F6EC
	{128752, 128764},															-- 1F6F0..1F6FC
	{128768, 128886},															-- 1F700..1F776
	{128891, 128985},															-- 1F77B..1F7D9
	{128992, 129003},															-- 1F7E0..1F7EB
	{129024, 129035},															-- 1F800..1F80B
	{129040, 129095},															-- 1F810..1F847
	{129104, 129113},															-- 1F850..1F859
	{129120, 129159},															-- 1F860..1F887
	{129168, 129197},															-- 1F890..1F8AD
	{129200, 129211},															-- 1F8B0..1F8BB
	{129216, 129217},															-- 1F8C0..1F8C1
	{129280, 129619},															-- 1F900..1FA53
	{129632, 129645},															-- 1FA60..1FA6D
	{129648, 129660},															-- 1FA70..1FA7C
	{129664, 129673},															-- 1FA80..1FA89
	{129679, 129734},															-- 1FA8F..1FAC6
	{129742, 129756},															-- 1FACE..1FADC
	{129759, 129769},															-- 1FADF..1FAE9
	{129776, 129784},															-- 1FAF0..1FAF8
	{129792, 129938},															-- 1FB00..1FB92
	{129940, 130041},															-- 1FB94..1FBF9
	{917536, 917631},															-- E0020..E007F
	}


--[[--------------------------< S P E C I A L S _ T >---------------------------------------------------------

list of individual language-specific non-Latn and non-Zyyy codepoints; these codepoints commonly used in
transliterations.  This list is manually currated so is most likely incomplete.

keys to <specials_t> are decimal codepoints; other keys are language tags (always lowercase) of language
transliterations that use these non-Latn codepoints.

]]

local specials_t = {
	[788] = {																	-- U+0314: COMBINING REVERSED COMMA ABOVE
			["hy"] = true,														-- Armenian
			},
	[794] = {																	-- U+031A: COMBINING LEFT ANGLE ABOVE
			["ltc"] = true,														-- Middle Chinese; is this really IPA?
			},
	[795] = {																	-- U+031B: COMBINING HORN
			["th"] = true,														-- Thai
			},
	[806] = {																	-- U+0326: COMBINING COMMA BELOW
			["ab"] = true,														-- Abkhaz
			["kca"] = true,														-- Khanty
			["xal"] = true,														-- Kalmyk or Oirat
			},
	[807] = {																	-- U+0327: COMBINING CEDILLA
			["fa"] = true,														-- Persian
			},
	[809] = {																	-- U+0329: COMBINING VERTICAL LINE BELOW
			["ab"] = true,														-- Abkhaz
			["sa"] = true,														-- Sanskrit
			},
	[815] = {																	-- U+032F: COMBINING INVERTED BREVE BELOW
			["mong"] = true,													-- Mongolian
			["xsc"] = true,														-- Scythian
			},
	[818] = {																	-- U+0332: COMBINING LOW LINE
			["ar"] = true,														-- Arabic
			["hbo"] = true,														-- Ancient Hebrew
			["he"] = true,														-- Hebrew
			["jpa"] = true,														-- Jewish Palestinian Aramaic
			["mdh"] = true,														-- Maguindanaon
			["otk"] = true,														-- Old Turkish
			},
	[831] = {																	-- U+033F: COMBINING DOUBLE OVERLINE
			["mnp"] = true,														-- Northern Min Chinese, Jian'ou dialect
			},
	[855] = {																	-- U+0357: COMBINING RIGHT HALF RING ABOVE
			["egy"] = true,														-- Ancient Egyptian
			},
	[863] = {																	-- U+035F: COMBINING DOUBLE MACRON BELOW
			["am"] = true,														-- Amharic
			["ar"] = true,														-- Arabic
			["dv"] = true,														-- Dhivehi, Divehi, or Maldivian
			["fa"] = true,														-- Persian
			["hi"] = true,														-- Hindi
			["inc"] = true,														-- Indic languages
			["ur"] = true,														-- Urdu
			},
	[864] = {																	-- U+0360: COMBINING DOUBLE TILDE
			["hi"] = true,														-- Hindi
			},
	[865] = {																	-- U+0361: COMBINING DOUBLE INVERTED BREVE
			["be"] = true,														-- Belarusian
			["ltc"] = true,														-- Middle Chinese; is this really IPA?
			["ru"] = true,														-- Russian
			["rue"] = true, 													-- Rusyn
			["sem"] = true,														-- Semitic languages
			["sit"] = true,														-- Sino-Tibetan languages
			["tt"] = true,														-- Tatar
			},
	[916] = {																	-- U+0394: GREEK CAPITAL LETTER DELTA
			["xsc"] = true,														-- Scythian
			["xsc-x-pontic"] = true,											-- Pontic Scythian
			},
	[920] = {																	-- U+0398: GREEK CAPITAL LETTER THETA
			["ae"] = true,														-- Avestan
			},
	[934] = {																	-- U+03A6: GREEK CAPITAL LETTER PHI
			["xle"] = true,														-- Lemnian
			},
	[945] = {																	-- U+03B1: GREEK SMALL LETTER ALPHA
			["apc"] = true,														-- Levantine Arabic
			},
	[946] = {																	-- U+03B2: GREEK SMALL LETTER BETA
			["ae"] = true,														-- Avestan
			["gha"] = true,														-- Ghadamès
			["ougr"] = true,													-- Old Uyghur
			["sem"] = true,														-- Semitic languages
			["syc"] = true,														-- Classical Syriac
			["wuu"] = true,														-- Shanghainese variety of Wu Chinese
			},
	[947] = {																	-- U+03B3: GREEK SMALL LETTER GAMMA
			["ae"] = true,														-- Avestan
			["ltc"] = true,														-- Late Middle Chinese
			["mn"] = true,														-- Mongolian
			["och"] = true,														-- Old Chinese
			["ougr"] = true,													-- Old Uyghur
			["pal"] = true,														-- Middle Persian
			["syc"] = true,														-- Classical Syriac
			["syr"] = true,														-- Syriac
			["xal"] = true,														-- Kalmyk or Oirat
			["xng"] = true,														-- Middle Mongolian
			["xsc"] = true,														-- Scythian
			["ydg"] = true,														-- Yidgha
			["mong"] = true,													-- Mongolian
			},
	[948] = {																	-- U+03B4: GREEK SMALL LETTER DELTA
			["ae"] = true,														-- Avestan
			["ougr"] = true,													-- Old Uyghur
			["sog"] = true,														-- Sogdian
			["sogd"] = true,													-- Sogdian
			["syc"] = true,														-- Classical Syriac
			["xpr"] = true,														-- Parthian
			["xsc"] = true,														-- Scythian
			["xsc-x-pontic"] = true,											-- Pontic Scythian
			},
	[952] = {																	-- U+03B8: GREEK SMALL LETTER THETA
			["ae"] = true,														-- Avestan
			["ba"] = true,														-- Bashkir
			["cms"] = true,														-- Messapic
			["ett"] = true,														-- Etruscan
			["hur"] = true,														-- Halkomelem
			["ira"] = true,														-- Iranian languages
			["my"] = true,														-- Burmese
			["pal"] = true,														-- Middle Persian (Pahlavi)
			["peo"] = true,														-- Old Persian
			["sa"] = true,														-- Sanskrit
			["sem"] = true,														-- Semitic languages
			["syc"] = true,														-- Classical Syriac
			["syr"] = true,														-- Syriac
			["xme"] = true,														-- Median
			["xpg"] = true,														-- Phrygian
			["xpr"] = true,														-- Parthian
			["xsc"] = true,														-- Scythian
			},
	[955] = {																	-- U+03BB: GREEK SMALL LETTER LAMDA
			["xcr"] = true,														-- Carian
			["xld"] = true,														-- Lydian
			},
	[963] = {																	-- U+03C3: GREEK SMALL LETTER SIGMA
			["ett"] = true,														-- Etruscan
			},
	[964] = {																	-- U+03C4: GREEK SMALL LETTER TAU
			["xld"] = true,														-- Lydian
			},
	[966] = {																	-- U+03C6: GREEK SMALL LETTER PHI
			["ett"] = true,														-- Etruscan
			},
	[967] = {																	-- U+03C7: GREEK SMALL LETTER CHI
			["ett"] = true,														-- Etruscan
			["gem"] = true,														-- Germanic languages
			["hur"] = true,														-- Halkomelem
			["kbd"] = true,														-- Kabardian
			["ltc"] = true,														-- Late Middle Chinese
			["och"] = true,														-- Old Chinese
			["xlc"] = true,														-- Lycian
			["xle"] = true,														-- Lemnian
			},
	[968] = {																	-- U+03C8: GREEK SMALL LETTER PSI
			["ett"] = true,														-- Etruscan
			},
	[977] = {																	-- U+03D1: GREEK THETA SYMBOL (cursive form)
			["ae"] = true,														-- Avestan
			["ira"] = true,														-- Iranian languages
			["peo"] = true,														-- Old Persian
			["xme"] = true,														-- Median
			["xsc"] = true,														-- Scythian
			["xsc-x-pontic"] = true,											-- Pontic Scythian
			},
	[1098] = {																	-- U+044A: CYRILLIC SMALL LETTER HARD SIGN
			["ady"] = true,														-- Adyghe
			["cu"] = true,														-- Church Slavic
			["zls"] = true,														-- South Slavic languages
			},
	[1100] = {																	-- U+044C: CYRILLIC SMALL LETTER SOFT SIGN
			["az"] = true,														-- Azerbaijani
			["cu"] = true,														-- Church Slavonic
			["dng"] = true,														-- Dungan
			["ru"] = true,														-- Russian
			["tt"] = true,														-- Tatar
			["tyv"] = true,														-- Tuvinian
			},
	[1278] = {																	-- U+04FE: CYRILLIC CAPITAL LETTER HA WITH STROKE
			["av"] = true,														-- Avar
			},
	[1279] = {																	-- U+04FF: CYRILLIC SMALL LETTER HA WITH STROKE
			["av"] = true,														-- Avar
			},
	[8113] = {																	-- U+1FB1: GREEK SMALL LETTER ALPHA WITH MACRON
			["apc"] = true,														-- Levantine Arabic
			},
	[8190] = {																	-- U+1FFE: GREEK DASIA
			["ar"] = true,														-- Arabic (Ayin)
			["xcl"] = true,														-- Classical Armenian
			},
	[19978] = {																	-- U+4E0A: [CJK Unified Ideographs]
			["wuu"] = true,														-- Wu Chinese tone marker
			},
	[20837] = {																	-- U+5165: [CJK Unified Ideographs]
			["wuu"] = true,														-- Wu Chinese tone marker
			},
	[21435] = {																	-- U+53BB: [CJK Unified Ideographs]
			["wuu"] = true,														-- Wu Chinese tone marker
			},
	[24179] = {																	-- U+5E73: [CJK Unified Ideographs]
			["wuu"] = true,														-- Wu Chinese tone marker
			},
	[38451] = {																	-- U+9633: [CJK Unified Ideographs] (Yang)
			["wuu"] = true,														-- Suzhou dialect of Wu Chinese tone marker --[[Suzhou dialect#Tones]]
			},
	[38452] = {																	-- U+9634: [CJK Unified Ideographs] (Yin)
			["wuu"] = true,														-- Suzhou dialect of Wu Chinese tone marker --[[Suzhou dialect#Tones]]
			},
	[65056] = {																	-- U+FE20: COMBINING LIGATURE LEFT HALF
			["ru"] = true,														-- Russian
			},
	[65057] = {																	-- U+FE21: COMBINING LIGATURE RIGHT HALF
			["ru"] = true,														-- Russian
			},
	}


--[[--------------------------< E X P O R T S >---------------------------------------------------------------
]]

return {
	ranges_t = ranges_t,
	singles_t = singles_t,
	specials_t = specials_t,
	
	sizeof_ranges_t = #ranges_t,
	}