Spaces:
Running
Running
| """ | |
| This file is adapted from https://github.com/hpbyte/Myanmar_Number_to_Words | |
| """ | |
| import re | |
| mm_digit = { | |
| "แ": "แแฏแ", | |
| "แ": "แแ แบ", | |
| "แ": "แแพแ แบ", | |
| "แ": "แแฏแถ:", | |
| "แ": "แแฑ:", | |
| "แ ": "แแซ:", | |
| "แ": "แแผแฑแฌแแบ", | |
| "แ": "แแฏแแพแ แบ", | |
| "แ": "แแพแ แบ", | |
| "แ": "แแญแฏ:", | |
| } | |
| # regular expressions | |
| rgxPh = "^(แแ|แแ)" | |
| rgxDate = "[แ-แ]{1,2}-[แ-แ]{1,2}-[แ-แ]{4}|[แ-แ]{1,2}\/[แ-แ]{1,2}\/[แ-แ]{4}" | |
| rgxTime = "[แ-แ]{1,2}:[แ-แ]{1,2}" | |
| rgxDec = "[แ-แ]*\.[แ-แ]*" | |
| rgxAmt = "[,แ-แ]+" | |
| def convert_digit(num): | |
| """ | |
| @type num str | |
| @param num Myanmar number | |
| @rtype str | |
| @return converted Myanmar spoken words | |
| """ | |
| converted = "" | |
| nb_digits = len(num) | |
| def check_if_zero(pos): | |
| return not num[-pos] == "แ" | |
| def hundred_thousandth_val(): | |
| n = num[:-5] | |
| return ( | |
| ("แแญแแบ: " + mm_num2word(n)) | |
| if (n[-2:] == "แแ") | |
| else (mm_num2word(n) + "แแญแแบ: ") | |
| ) | |
| def thousandth_val(): | |
| return mm_digit[num[-4]] + ("แแฑแฌแแบ " if (num[-3:] == "แแแ") else "แแฑแฌแแบแท ") | |
| def hundredth_val(): | |
| return mm_digit[num[-3]] + ( | |
| "แแฌแท " | |
| if ( | |
| (num[-2] == "แ" and re.match(r"[แ-แ]", num[-1])) | |
| or (re.match(r"[แ-แ]", num[-2]) and num[-1] == "แ") | |
| ) | |
| else "แแฌ " | |
| ) | |
| def tenth_val(): | |
| return ("" if (num[-2] == "แ") else mm_digit[num[-2]]) + ( | |
| "แแแบ " if (num[-1] == "แ") else "แแแบแท " | |
| ) | |
| if nb_digits > 5: | |
| converted += hundred_thousandth_val() | |
| if (nb_digits > 4) and check_if_zero(5): | |
| converted += mm_digit[num[-5]] + "แแฑแฌแแบ: " | |
| if (nb_digits > 3) and check_if_zero(4): | |
| converted += thousandth_val() | |
| if (nb_digits > 2) and check_if_zero(3): | |
| converted += hundredth_val() | |
| if (nb_digits > 1) and check_if_zero(2): | |
| converted += tenth_val() | |
| if (nb_digits > 0) and check_if_zero(1): | |
| converted += mm_digit[num[-1]] | |
| return converted | |
| def mm_num2word(num): | |
| """ | |
| Detect type of number and convert accordingly | |
| @type num str | |
| @param num Myanmar number | |
| @rtype str | |
| @return converted Myanmar spoken words | |
| """ | |
| word = "" | |
| # phone number | |
| if re.match(r"" + rgxPh, num[:2]): | |
| word = " ".join([(mm_digit[d] if not d == "แ" else "แแฝแแบ") for d in num]) | |
| # date | |
| elif re.match(r"" + rgxDate, num): | |
| n = re.split(r"-|/", num) | |
| word = ( | |
| convert_digit(n[-1]) | |
| + " แแฏแแพแ แบ " | |
| + convert_digit(n[1]) | |
| + " แแแญแฏแแบ: " | |
| + convert_digit(n[0]) | |
| + " แแแบ" | |
| ) | |
| # time | |
| elif re.match(r"" + rgxTime, num): | |
| n = re.split(r":", num) | |
| word = (convert_digit(n[0]) + " แแฌแแฎ ") + ( | |
| "แแฝแฒ" if (n[1] == "แแ") else (convert_digit(n[1]) + " แแญแแ แบ") | |
| ) | |
| # decimal | |
| elif re.match(r"" + rgxDec, num): | |
| n = re.split(r"\.", num) | |
| word = convert_digit(n[0]) + " แแฟแ " + " ".join([mm_digit[d] for d in n[1]]) | |
| # amount | |
| elif re.match(r"" + rgxAmt, num): | |
| word = convert_digit(num.replace(",", "")) | |
| # default | |
| else: | |
| raise Exception("Cannot convert the provided number format!") | |
| return word | |
| def extract_num(S): | |
| """ | |
| Extract numbers from the input string | |
| @type S str | |
| @param S Myanmar sentence | |
| @rtype list | |
| @return a list of Myanmar numbers | |
| """ | |
| matchedNums = re.compile( | |
| "%s|%s|%s|%s" % (rgxDate, rgxTime, rgxDec, rgxAmt) | |
| ).findall(S) | |
| return matchedNums | |