To: vim_dev@googlegroups.com Subject: Patch 8.2.3068 Fcc: outbox From: Bram Moolenaar Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ------------ Patch 8.2.3068 Problem: Unicode tables are slightly outdated. Solution: Update the tables for Unicode release 13. (Christian Brabandt closes #8430) Files: runtime/tools/unicode.vim, src/mbyte.c *** ../vim-8.2.3067/runtime/tools/unicode.vim 2018-07-14 19:27:57.000000000 +0200 --- runtime/tools/unicode.vim 2021-06-27 21:26:27.129681656 +0200 *************** *** 7,13 **** " Usage: Vim -S " " Author: Bram Moolenaar ! " Last Update: 2010 Jan 12 " Parse lines of UnicodeData.txt. Creates a list of lists in s:dataprops. func! ParseDataToProps() --- 7,13 ---- " Usage: Vim -S " " Author: Bram Moolenaar ! " Last Update: 2020 Aug 24 " Parse lines of UnicodeData.txt. Creates a list of lists in s:dataprops. func! ParseDataToProps() *************** *** 195,200 **** --- 195,207 ---- let end = -1 let ranges = [] let dataidx = 0 + " Account for indentation differences between ambiguous and doublewidth + " table in mbyte.c + if a:pattern == 'A' + let spc = ' ' + else + let spc = "\t" + endif for p in s:widthprops if p[1][0] =~ a:pattern if p[0] =~ '\.\.' *************** *** 229,235 **** else if start >= 0 " produce previous range ! call add(ranges, printf("\t{0x%04x, 0x%04x},", start, end)) if a:pattern == 'A' call add(s:ambitable, [start, end]) else --- 236,242 ---- else if start >= 0 " produce previous range ! call add(ranges, printf("%s{0x%04x, 0x%04x},", spc, start, end)) if a:pattern == 'A' call add(s:ambitable, [start, end]) else *************** *** 243,249 **** endif endfor if start >= 0 ! call add(ranges, printf("\t{0x%04x, 0x%04x},", start, end)) if a:pattern == 'A' call add(s:ambitable, [start, end]) else --- 250,256 ---- endif endfor if start >= 0 ! call add(ranges, printf("%s{0x%04x, 0x%04x},", spc, start, end)) if a:pattern == 'A' call add(s:ambitable, [start, end]) else *************** *** 254,346 **** " New buffer to put the result in. new exe "file " . a:tableName ! call setline(1, " static struct interval " . a:tableName . "[] =") ! call setline(2, " {") call append('$', ranges) call setline('$', getline('$')[:-2]) " remove last comma ! call setline(line('$') + 1, " };") wincmd p endfunc ! " Build the amoji width table in a new buffer. ! func! BuildEmojiTable(pattern, tableName) ! let alltokens = [] ! let widthtokens = [] ! let lines = map(filter(filter(getline(1, '$'), 'v:val=~"^[1-9]"'), 'v:val=~a:pattern'), 'matchstr(v:val,"^\\S\\+")') ! for n in range(len(lines)) ! let line = lines[n] ! let token = split(line, '\.\.') ! let first = ('0x' . token[0]) + 0 ! if len(token) == 1 let last = first else ! let last = ('0x' . token[1]) + 0 endif ! let token = [first, last] ! if len(alltokens) > 0 && (token[0] - 1 == alltokens[-1][1]) ! let alltokens[-1][1] = token[1] else ! call add(alltokens, token) endif ! " Characters below 1F000 may be considered single width traditionally, ! " making them double width causes problems. ! if first < 0x1f000 ! continue ! endif - " exclude characters that are in the "ambiguous" or "doublewidth" table - for ambi in s:ambitable - if first >= ambi[0] && first <= ambi[1] - let first = ambi[1] + 1 - endif - if last >= ambi[0] && last <= ambi[1] - let last = ambi[0] - 1 - endif - endfor - for double in s:doubletable - if first >= double[0] && first <= double[1] - let first = double[1] + 1 - endif - if last >= double[0] && last <= double[1] - let last = double[0] - 1 - endif - endfor ! if first <= last ! let token = [first, last] ! if len(widthtokens) > 0 && (token[0] - 1 == widthtokens[-1][1]) ! let widthtokens[-1][1] = token[1] ! else ! call add(widthtokens, token) ! endif ! endif ! endfor ! let allranges = map(alltokens, 'printf("\t{0x%04x, 0x%04x},", v:val[0], v:val[1])') ! let widthranges = map(widthtokens, 'printf("\t{0x%04x, 0x%04x},", v:val[0], v:val[1])') " New buffer to put the result in. new ! exe "file " . a:tableName . '_all' ! call setline(1, " static struct interval " . a:tableName . "_all[] =") ! call setline(2, " {") call append('$', allranges) call setline('$', getline('$')[:-2]) " remove last comma ! call setline(line('$') + 1, " };") wincmd p " New buffer to put the result in. new ! exe "file " . a:tableName . '_width' ! call setline(1, " static struct interval " . a:tableName . "_width[] =") call setline(2, " {") ! call append('$', widthranges) call setline('$', getline('$')[:-2]) " remove last comma call setline(line('$') + 1, " };") wincmd p endfunc " Try to avoid hitting E36 set equalalways --- 261,425 ---- " New buffer to put the result in. new exe "file " . a:tableName ! if a:pattern == 'A' ! call setline(1, "static struct interval " . a:tableName . "[] =") ! call setline(2, "{") ! else ! call setline(1, " static struct interval " . a:tableName . "[] =") ! call setline(2, " {") ! endif call append('$', ranges) call setline('$', getline('$')[:-2]) " remove last comma ! if a:pattern == 'A' ! call setline(line('$') + 1, "};") ! else ! call setline(line('$') + 1, " };") ! endif wincmd p endfunc ! ! " Get characters from a list of lines in form "12ab .." or "12ab..56cd ..." ! " and put them in dictionary "chardict" ! func AddLinesToCharDict(lines, chardict) ! for line in a:lines ! let tokens = split(line, '\.\.') ! let first = str2nr(tokens[0], 16) ! if len(tokens) == 1 let last = first else ! let last = str2nr(tokens[1], 16) endif + for nr in range(first, last) + let a:chardict[nr] = 1 + endfor + endfor + endfunc ! func Test_AddLinesToCharDict() ! let dict = {} ! call AddLinesToCharDict([ ! \ '1234 blah blah', ! \ '1235 blah blah', ! \ '12a0..12a2 blah blah', ! \ '12a1 blah blah', ! \ ], dict) ! call assert_equal({0x1234: 1, 0x1235: 1, ! \ 0x12a0: 1, 0x12a1: 1, 0x12a2: 1, ! \ }, dict) ! if v:errors != [] ! echoerr 'AddLinesToCharDict' v:errors ! return 1 ! endif ! return 0 ! endfunc ! ! ! func CharDictToPairList(chardict) ! let result = [] ! let keys = keys(a:chardict)->map('str2nr(v:val)')->sort('N') ! let low = keys[0] ! let high = keys[0] ! for key in keys ! if key > high + 1 ! call add(result, [low, high]) ! let low = key ! let high = key else ! let high = key endif + endfor + call add(result, [low, high]) + return result + endfunc ! func Test_CharDictToPairList() ! let dict = {0x1020: 1, 0x1021: 1, 0x1022: 1, ! \ 0x1024: 1, ! \ 0x2022: 1, ! \ 0x2024: 1, 0x2025: 1} ! call assert_equal([ ! \ [0x1020, 0x1022], ! \ [0x1024, 0x1024], ! \ [0x2022, 0x2022], ! \ [0x2024, 0x2025], ! \ ], CharDictToPairList(dict)) ! if v:errors != [] ! echoerr 'CharDictToPairList' v:errors ! return 1 ! endif ! return 0 ! endfunc ! " Build the amoji width table in a new buffer. ! func BuildEmojiTable() ! " First make the table for all emojis. ! let pattern = '; Emoji\s\+#\s' ! let lines = map(filter(filter(getline(1, '$'), 'v:val=~"^[1-9]"'), 'v:val=~pattern'), 'matchstr(v:val,"^\\S\\+")') ! ! " Make a dictionary with an entry for each character. ! let chardict = {} ! call AddLinesToCharDict(lines, chardict) ! let pairlist = CharDictToPairList(chardict) ! let allranges = map(pairlist, 'printf(" {0x%04x, 0x%04x},", v:val[0], v:val[1])') " New buffer to put the result in. new ! exe 'file emoji_all' ! call setline(1, "static struct interval emoji_all[] =") ! call setline(2, "{") call append('$', allranges) call setline('$', getline('$')[:-2]) " remove last comma ! call setline(line('$') + 1, "};") wincmd p + " Make the table for wide emojis. + let pattern = '; Emoji_\(Presentation\|Modifier_Base\)\s\+#\s' + let lines = map(filter(filter(getline(1, '$'), 'v:val=~"^[1-9]"'), 'v:val=~pattern'), 'matchstr(v:val,"^\\S\\+")') + + " Make a dictionary with an entry for each character. + let chardict = {} + call AddLinesToCharDict(lines, chardict) + + " exclude characters that are in the "ambiguous" or "doublewidth" table + for ambi in s:ambitable + for nr in range(ambi[0], ambi[1]) + if has_key(chardict, nr) + call remove(chardict, nr) + endif + endfor + endfor + + for wide in s:doubletable + for nr in range(wide[0], wide[1]) + if has_key(chardict, nr) + call remove(chardict, nr) + endif + endfor + endfor + + let pairlist = CharDictToPairList(chardict) + let wide_ranges = map(pairlist, 'printf("\t{0x%04x, 0x%04x},", v:val[0], v:val[1])') + " New buffer to put the result in. new ! exe 'file emoji_wide' ! call setline(1, " static struct interval emoji_wide[] =") call setline(2, " {") ! call append('$', wide_ranges) call setline('$', getline('$')[:-2]) " remove last comma call setline(line('$') + 1, " };") wincmd p endfunc + " First test a few things + let v:errors = [] + if Test_AddLinesToCharDict() || Test_CharDictToPairList() + finish + endif + + " Try to avoid hitting E36 set equalalways *************** *** 383,391 **** call BuildWidthTable('A', 'ambiguous') " Edit the emoji text file. Requires the netrw plugin. ! edit https://www.unicode.org/Public/emoji/11.0/emoji-data.txt ! "edit http://www.unicode.org/Public/emoji/latest/emoji-data.txt " Build the emoji table. Ver. 1.0 - 6.0 ! " Must come after the "ambiguous" table ! call BuildEmojiTable('; Emoji\s\+#\s\+\d\+\.\d', 'emoji') --- 462,469 ---- call BuildWidthTable('A', 'ambiguous') " Edit the emoji text file. Requires the netrw plugin. ! edit https://unicode.org/Public/emoji/12.1/emoji-data.txt " Build the emoji table. Ver. 1.0 - 6.0 ! " Must come after the "ambiguous" and "doublewidth" tables ! call BuildEmojiTable() *** ../vim-8.2.3067/src/mbyte.c 2021-06-13 20:27:32.889357660 +0200 --- src/mbyte.c 2021-06-27 21:26:27.129681656 +0200 *************** *** 1389,1395 **** {0x26ce, 0x26ce}, {0x26d4, 0x26d4}, {0x26ea, 0x26ea}, ! {0x26f2, 0x26f5}, {0x26fa, 0x26fa}, {0x26fd, 0x26fd}, {0x2705, 0x2705}, --- 1389,1396 ---- {0x26ce, 0x26ce}, {0x26d4, 0x26d4}, {0x26ea, 0x26ea}, ! {0x26f2, 0x26f3}, ! {0x26f5, 0x26f5}, {0x26fa, 0x26fa}, {0x26fd, 0x26fd}, {0x2705, 0x2705}, *************** *** 1414,1421 **** {0x3099, 0x30ff}, {0x3105, 0x312f}, {0x3131, 0x318e}, ! {0x3190, 0x31ba}, ! {0x31c0, 0x31e3}, {0x31f0, 0x321e}, {0x3220, 0x3247}, {0x3250, 0x4dbf}, --- 1415,1421 ---- {0x3099, 0x30ff}, {0x3105, 0x312f}, {0x3131, 0x318e}, ! {0x3190, 0x31e3}, {0x31f0, 0x321e}, {0x3220, 0x3247}, {0x3250, 0x4dbf}, *************** *** 1431,1438 **** {0xff01, 0xff60}, {0xffe0, 0xffe6}, {0x16fe0, 0x16fe3}, {0x17000, 0x187f7}, ! {0x18800, 0x18af2}, {0x1b000, 0x1b11e}, {0x1b150, 0x1b152}, {0x1b164, 0x1b167}, --- 1431,1440 ---- {0xff01, 0xff60}, {0xffe0, 0xffe6}, {0x16fe0, 0x16fe3}, + {0x16ff0, 0x16ff1}, {0x17000, 0x187f7}, ! {0x18800, 0x18cd5}, ! {0x18d00, 0x18d08}, {0x1b000, 0x1b11e}, {0x1b150, 0x1b152}, {0x1b164, 0x1b167}, *************** *** 1467,1486 **** {0x1f680, 0x1f6c5}, {0x1f6cc, 0x1f6cc}, {0x1f6d0, 0x1f6d2}, ! {0x1f6d5, 0x1f6d5}, {0x1f6eb, 0x1f6ec}, ! {0x1f6f4, 0x1f6fa}, {0x1f7e0, 0x1f7eb}, ! {0x1f90d, 0x1f971}, ! {0x1f973, 0x1f976}, ! {0x1f97a, 0x1f9a2}, ! {0x1f9a5, 0x1f9aa}, ! {0x1f9ae, 0x1f9ca}, {0x1f9cd, 0x1f9ff}, ! {0x1fa70, 0x1fa73}, {0x1fa78, 0x1fa7a}, ! {0x1fa80, 0x1fa82}, ! {0x1fa90, 0x1fa95}, {0x20000, 0x2fffd}, {0x30000, 0x3fffd} }; --- 1469,1490 ---- {0x1f680, 0x1f6c5}, {0x1f6cc, 0x1f6cc}, {0x1f6d0, 0x1f6d2}, ! {0x1f6d5, 0x1f6d7}, {0x1f6eb, 0x1f6ec}, ! {0x1f6f4, 0x1f6fc}, {0x1f7e0, 0x1f7eb}, ! {0x1f90c, 0x1f93a}, ! {0x1f93c, 0x1f945}, ! {0x1f947, 0x1f978}, ! {0x1f97a, 0x1f9cb}, {0x1f9cd, 0x1f9ff}, ! {0x1fa70, 0x1fa74}, {0x1fa78, 0x1fa7a}, ! {0x1fa80, 0x1fa86}, ! {0x1fa90, 0x1faa8}, ! {0x1fab0, 0x1fab6}, ! {0x1fac0, 0x1fac2}, ! {0x1fad0, 0x1fad6}, {0x20000, 0x2fffd}, {0x30000, 0x3fffd} }; *************** *** 2350,2356 **** {0x0b3e, 0x0b44}, {0x0b47, 0x0b48}, {0x0b4b, 0x0b4d}, ! {0x0b56, 0x0b57}, {0x0b62, 0x0b63}, {0x0b82, 0x0b82}, {0x0bbe, 0x0bc2}, --- 2354,2360 ---- {0x0b3e, 0x0b44}, {0x0b47, 0x0b48}, {0x0b4b, 0x0b4d}, ! {0x0b55, 0x0b57}, {0x0b62, 0x0b63}, {0x0b82, 0x0b82}, {0x0bbe, 0x0bc2}, *************** *** 2377,2383 **** {0x0d4a, 0x0d4d}, {0x0d57, 0x0d57}, {0x0d62, 0x0d63}, ! {0x0d82, 0x0d83}, {0x0dca, 0x0dca}, {0x0dcf, 0x0dd4}, {0x0dd6, 0x0dd6}, --- 2381,2387 ---- {0x0d4a, 0x0d4d}, {0x0d57, 0x0d57}, {0x0d62, 0x0d63}, ! {0x0d81, 0x0d83}, {0x0dca, 0x0dca}, {0x0dcf, 0x0dd4}, {0x0dd6, 0x0dd6}, *************** *** 2424,2430 **** {0x1a55, 0x1a5e}, {0x1a60, 0x1a7c}, {0x1a7f, 0x1a7f}, ! {0x1ab0, 0x1abe}, {0x1b00, 0x1b04}, {0x1b34, 0x1b44}, {0x1b6b, 0x1b73}, --- 2428,2434 ---- {0x1a55, 0x1a5e}, {0x1a60, 0x1a7c}, {0x1a7f, 0x1a7f}, ! {0x1ab0, 0x1ac0}, {0x1b00, 0x1b04}, {0x1b34, 0x1b44}, {0x1b6b, 0x1b73}, *************** *** 2453,2458 **** --- 2457,2463 ---- {0xa806, 0xa806}, {0xa80b, 0xa80b}, {0xa823, 0xa827}, + {0xa82c, 0xa82c}, {0xa880, 0xa881}, {0xa8b4, 0xa8c5}, {0xa8e0, 0xa8f1}, *************** *** 2488,2493 **** --- 2493,2499 ---- {0x10a3f, 0x10a3f}, {0x10ae5, 0x10ae6}, {0x10d24, 0x10d27}, + {0x10eab, 0x10eac}, {0x10f46, 0x10f50}, {0x11000, 0x11002}, {0x11038, 0x11046}, *************** *** 2500,2505 **** --- 2506,2512 ---- {0x11180, 0x11182}, {0x111b3, 0x111c0}, {0x111c9, 0x111cc}, + {0x111ce, 0x111cf}, {0x1122c, 0x11237}, {0x1123e, 0x1123e}, {0x112df, 0x112ea}, *************** *** 2522,2527 **** --- 2529,2539 ---- {0x116ab, 0x116b7}, {0x1171d, 0x1172b}, {0x1182c, 0x1183a}, + {0x11930, 0x11935}, + {0x11937, 0x11938}, + {0x1193b, 0x1193e}, + {0x11940, 0x11940}, + {0x11942, 0x11943}, {0x119d1, 0x119d7}, {0x119da, 0x119e0}, {0x119e4, 0x119e4}, *************** *** 2549,2554 **** --- 2561,2568 ---- {0x16f4f, 0x16f4f}, {0x16f51, 0x16f87}, {0x16f8f, 0x16f92}, + {0x16fe4, 0x16fe4}, + {0x16ff0, 0x16ff1}, {0x1bc9d, 0x1bc9e}, {0x1d165, 0x1d169}, {0x1d16d, 0x1d172}, *************** *** 2650,2655 **** --- 2664,2670 ---- {0x2699, 0x2699}, {0x269b, 0x269c}, {0x26a0, 0x26a1}, + {0x26a7, 0x26a7}, {0x26aa, 0x26ab}, {0x26b0, 0x26b1}, {0x26bd, 0x26be}, *************** *** 2695,2701 **** {0x3299, 0x3299}, {0x1f004, 0x1f004}, {0x1f0cf, 0x1f0cf}, ! {0x1f170, 0x1f189}, {0x1f18e, 0x1f18e}, {0x1f191, 0x1f19a}, {0x1f1e6, 0x1f1ff}, --- 2710,2717 ---- {0x3299, 0x3299}, {0x1f004, 0x1f004}, {0x1f0cf, 0x1f0cf}, ! {0x1f170, 0x1f171}, ! {0x1f17e, 0x1f17f}, {0x1f18e, 0x1f18e}, {0x1f191, 0x1f19a}, {0x1f1e6, 0x1f1ff}, *************** *** 2735,2755 **** {0x1f5fa, 0x1f64f}, {0x1f680, 0x1f6c5}, {0x1f6cb, 0x1f6d2}, {0x1f6e0, 0x1f6e5}, {0x1f6e9, 0x1f6e9}, {0x1f6eb, 0x1f6ec}, {0x1f6f0, 0x1f6f0}, ! {0x1f6f3, 0x1f6f9}, ! {0x1f910, 0x1f93a}, ! {0x1f93c, 0x1f93e}, ! {0x1f940, 0x1f945}, ! {0x1f947, 0x1f970}, ! {0x1f973, 0x1f976}, ! {0x1f97a, 0x1f97a}, ! {0x1f97c, 0x1f9a2}, ! {0x1f9b0, 0x1f9b9}, ! {0x1f9c0, 0x1f9c2}, ! {0x1f9d0, 0x1f9ff} }; /* --- 2751,2775 ---- {0x1f5fa, 0x1f64f}, {0x1f680, 0x1f6c5}, {0x1f6cb, 0x1f6d2}, + {0x1f6d5, 0x1f6d7}, {0x1f6e0, 0x1f6e5}, {0x1f6e9, 0x1f6e9}, {0x1f6eb, 0x1f6ec}, {0x1f6f0, 0x1f6f0}, ! {0x1f6f3, 0x1f6fc}, ! {0x1f7e0, 0x1f7eb}, ! {0x1f90c, 0x1f93a}, ! {0x1f93c, 0x1f945}, ! {0x1f947, 0x1f978}, ! {0x1f97a, 0x1f9cb}, ! {0x1f9cd, 0x1f9ff}, ! {0x1fa70, 0x1fa74}, ! {0x1fa78, 0x1fa7a}, ! {0x1fa80, 0x1fa86}, ! {0x1fa90, 0x1faa8}, ! {0x1fab0, 0x1fab6}, ! {0x1fac0, 0x1fac2}, ! {0x1fad0, 0x1fad6} }; /* *************** *** 3097,3102 **** --- 3117,3124 ---- {0xa7c4,0xa7c4,-1,-48}, {0xa7c5,0xa7c5,-1,-42307}, {0xa7c6,0xa7c6,-1,-35384}, + {0xa7c7,0xa7c9,2,1}, + {0xa7f5,0xa7f5,-1,1}, {0xab70,0xabbf,1,-38864}, {0xff21,0xff3a,1,32}, {0x10400,0x10427,1,40}, *************** *** 3321,3326 **** --- 3343,3350 ---- {0xa7c4,0xa7c4,-1,-48}, {0xa7c5,0xa7c5,-1,-42307}, {0xa7c6,0xa7c6,-1,-35384}, + {0xa7c7,0xa7c9,2,1}, + {0xa7f5,0xa7f5,-1,1}, {0xff21,0xff3a,1,32}, {0x10400,0x10427,1,40}, {0x104b0,0x104d3,1,40}, *************** *** 3509,3515 **** {0xa794,0xa794,-1,48}, {0xa797,0xa7a9,2,-1}, {0xa7b5,0xa7bf,2,-1}, ! {0xa7c3,0xa7c3,-1,-1}, {0xab53,0xab53,-1,-928}, {0xab70,0xabbf,1,-38864}, {0xff41,0xff5a,1,-32}, --- 3533,3540 ---- {0xa794,0xa794,-1,48}, {0xa797,0xa7a9,2,-1}, {0xa7b5,0xa7bf,2,-1}, ! {0xa7c3,0xa7c8,5,-1}, ! {0xa7ca,0xa7f6,44,-1}, {0xab53,0xab53,-1,-928}, {0xab70,0xabbf,1,-38864}, {0xff41,0xff5a,1,-32}, *** ../vim-8.2.3067/src/version.c 2021-06-27 19:02:48.985931140 +0200 --- src/version.c 2021-06-27 21:27:44.421556106 +0200 *************** *** 757,758 **** --- 757,760 ---- { /* Add new patch number below this line */ + /**/ + 3068, /**/ -- Don't read everything you believe. /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ /// \\\ \\\ sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ /// \\\ help me help AIDS victims -- http://ICCF-Holland.org ///