Spaces:

MUSKAN17
/

AI-To-Humanize

Running

App Files Files Community

MUSKAN17 commited on Feb 20, 2025

Commit

33a85b2

verified ·

1 Parent(s): 3e2d36c

fix

Browse files

Files changed (1) hide show

app.py +151 -151

app.py CHANGED Viewed

@@ -1,165 +1,165 @@
-import gradio as gr
-from nltk import pos_tag, ne_chunk
-from nltk.tokenize import word_tokenize, sent_tokenize
-from nltk.tokenize.treebank import TreebankWordDetokenizer
-from nltk.corpus import wordnet, brown
-from spellchecker import SpellChecker
-import re
-import nltk
-# Preload resources
-detokenizer = TreebankWordDetokenizer()
-global_freq_dist = nltk.FreqDist(w.lower() for w in brown.words())
-def extract_proper_nouns(text):
-    """
-    Extracts proper nouns such as PERSON, ORGANIZATION, and GPE from the given text.
-    Returns a set of detected proper nouns in lowercase.
-    """
-    tokens = word_tokenize(text)
-    tagged = pos_tag(tokens)
-    chunks = ne_chunk(tagged)
-    proper_nouns = set()
-    for chunk in chunks:
-        if hasattr(chunk, 'label') and chunk.label() in ('PERSON', 'ORGANIZATION', 'GPE'):
-            entity = " ".join(c[0] for c in chunk)
-            proper_nouns.add(entity.lower())
-    return proper_nouns
-def protect_proper_nouns(text, dynamic_entities):
-    """
-    Identifies and marks proper nouns to prevent them from being altered.
-    Returns a list of tuples (word, is_protected).
-    """
-    tokens = word_tokenize(text)
-    protected = []
-    for token in tokens:
-        lower_token = token.lower()
-        if any(lower_token in entity for entity in dynamic_entities):
-            protected.append((token, True))
-        else:
-            protected.append((token, False))
-    return protected
-def replace_words_with_synonyms(tokens_with_protection, pos_tags):
-    """
-    Replaces words with synonyms while maintaining readability and ensuring proper nouns remain unchanged.
-    """
-    new_tokens = []
-    for (token, protected), (_, tag) in zip(tokens_with_protection, pos_tags):
-        if protected or not token.isalpha():
-            new_tokens.append(token)
-            continue
-        pos = None
-        if tag.startswith('JJ'):
-            pos = wordnet.ADJ
-        elif tag.startswith('RB'):
-            pos = wordnet.ADV
-        elif tag.startswith('NN'):
-            pos = wordnet.NOUN
-        elif tag.startswith('VB'):
-            pos = wordnet.VERB
-        if pos:
-            candidates = []
-            for syn in wordnet.synsets(token, pos=pos):
-                for lemma in syn.lemmas():
-                    candidate = lemma.name().replace('_', ' ')
-                    if candidate.lower() == token.lower() or ' ' in candidate:
-                        continue
-                    if len(candidate) > len(token) * 1.2:
-                        continue
-                    candidates.append(candidate)
-            if candidates:
-                best_candidate = max(candidates,
-                                     key=lambda x: global_freq_dist[x.lower()],
-                                     default=token)
-                new_tokens.append(best_candidate)
-            else:
-                new_tokens.append(token)
-        else:
-            new_tokens.append(token)
-    return new_tokens
-def restructure_sentences(text):
-    """
-    Splits long sentences for better readability.
-    Uses punctuation-based splitting if necessary.
-    """
-    sentences = sent_tokenize(text)
-    restructured = []
-    for sent in sentences:
-        if len(sent.split()) > 25:
-            parts = re.split(r'[,;]', sent)
-            if len(parts) > 1:
-                sent = parts[0] + '. ' + ' '.join(parts[1:])
-        restructured.append(sent)
-    return ' '.join(restructured)
-def contextual_spell_check(text):
-    """
-    Performs spell checking while ensuring proper nouns remain unchanged.
-    """
-    protected = protect_proper_nouns(text, extract_proper_nouns(text))
-    spell = SpellChecker()
-    corrected = []
-    for token, protected_flag in protected:
-        if protected_flag or not token.isalpha():
-            corrected.append(token)
-            continue
-        correction = spell.correction(token)
-        if correction:
-            if token[0].isupper():
-                corrected.append(correction.capitalize())
-            else:
-                corrected.append(correction.lower())
-        else:
-            corrected.append(token)
-    return detokenizer.detokenize(corrected)
-def finalize_formatting(text):
-    """
-    Cleans up text formatting including spaces before punctuation, quote styles, and em dashes.
-    """
-    text = re.sub(r'\s+([.,!?;:])', r'\1', text)
-    text = re.sub(r'([(])\s+', r'\1', text)
-    text = re.sub(r'\s+([)])', r'\1', text)
-    text = re.sub(r'\"(.*?)\"', r'“\1”', text)
-    text = re.sub(r' -- ', r' — ', text)
-    return text.strip()
-def humanize_text(text):
-    """
-    Applies synonym replacement, sentence restructuring, and other enhancements to make text more natural.
-    """
-    dynamic_entities = extract_proper_nouns(text)
-    protected_tokens = protect_proper_nouns(text, dynamic_entities)
-    tokens = [t[0] for t in protected_tokens]
-    tags = pos_tag(tokens)
-    new_tokens = replace_words_with_synonyms(protected_tokens, tags)
-    text = detokenizer.detokenize(new_tokens)
-    return restructure_sentences(text)
-def process_text(input_text):
-    """
-    Processes input text to enhance readability by applying all transformations sequentially.
-    """
-    if input_text:
-        humanized = humanize_text(input_text)
-        spell_checked = contextual_spell_check(humanized)
-        final_output = finalize_formatting(spell_checked)
-        return final_output
-    return ""
-# Gradio Interface
-iface = gr.Interface(
-    fn=process_text,
-    inputs=gr.Textbox(lines=5, placeholder="Enter text here..."),
-    outputs=gr.Textbox(lines=5),
-    title="AI to Humanized Text Converter",
-    description="Enter text and get a more humanized, readable output.",
-)
-iface.launch()

+# import gradio as gr
+# from nltk import pos_tag, ne_chunk
+# from nltk.tokenize import word_tokenize, sent_tokenize
+# from nltk.tokenize.treebank import TreebankWordDetokenizer
+# from nltk.corpus import wordnet, brown
+# from spellchecker import SpellChecker
+# import re
+# import nltk
+# # Preload resources
+# detokenizer = TreebankWordDetokenizer()
+# global_freq_dist = nltk.FreqDist(w.lower() for w in brown.words())
+# def extract_proper_nouns(text):
+#     """
+#     Extracts proper nouns such as PERSON, ORGANIZATION, and GPE from the given text.
+#     Returns a set of detected proper nouns in lowercase.
+#     """
+#     tokens = word_tokenize(text)
+#     tagged = pos_tag(tokens)
+#     chunks = ne_chunk(tagged)
+#     proper_nouns = set()
+#     for chunk in chunks:
+#         if hasattr(chunk, 'label') and chunk.label() in ('PERSON', 'ORGANIZATION', 'GPE'):
+#             entity = " ".join(c[0] for c in chunk)
+#             proper_nouns.add(entity.lower())
+#     return proper_nouns
+# def protect_proper_nouns(text, dynamic_entities):
+#     """
+#     Identifies and marks proper nouns to prevent them from being altered.
+#     Returns a list of tuples (word, is_protected).
+#     """
+#     tokens = word_tokenize(text)
+#     protected = []
+#     for token in tokens:
+#         lower_token = token.lower()
+#         if any(lower_token in entity for entity in dynamic_entities):
+#             protected.append((token, True))
+#         else:
+#             protected.append((token, False))
+#     return protected
+# def replace_words_with_synonyms(tokens_with_protection, pos_tags):
+#     """
+#     Replaces words with synonyms while maintaining readability and ensuring proper nouns remain unchanged.
+#     """
+#     new_tokens = []
+#     for (token, protected), (_, tag) in zip(tokens_with_protection, pos_tags):
+#         if protected or not token.isalpha():
+#             new_tokens.append(token)
+#             continue
+#         pos = None
+#         if tag.startswith('JJ'):
+#             pos = wordnet.ADJ
+#         elif tag.startswith('RB'):
+#             pos = wordnet.ADV
+#         elif tag.startswith('NN'):
+#             pos = wordnet.NOUN
+#         elif tag.startswith('VB'):
+#             pos = wordnet.VERB
+#         if pos:
+#             candidates = []
+#             for syn in wordnet.synsets(token, pos=pos):
+#                 for lemma in syn.lemmas():
+#                     candidate = lemma.name().replace('_', ' ')
+#                     if candidate.lower() == token.lower() or ' ' in candidate:
+#                         continue
+#                     if len(candidate) > len(token) * 1.2:
+#                         continue
+#                     candidates.append(candidate)
+#             if candidates:
+#                 best_candidate = max(candidates,
+#                                      key=lambda x: global_freq_dist[x.lower()],
+#                                      default=token)
+#                 new_tokens.append(best_candidate)
+#             else:
+#                 new_tokens.append(token)
+#         else:
+#             new_tokens.append(token)
+#     return new_tokens
+# def restructure_sentences(text):
+#     """
+#     Splits long sentences for better readability.
+#     Uses punctuation-based splitting if necessary.
+#     """
+#     sentences = sent_tokenize(text)
+#     restructured = []
+#     for sent in sentences:
+#         if len(sent.split()) > 25:
+#             parts = re.split(r'[,;]', sent)
+#             if len(parts) > 1:
+#                 sent = parts[0] + '. ' + ' '.join(parts[1:])
+#         restructured.append(sent)
+#     return ' '.join(restructured)
+# def contextual_spell_check(text):
+#     """
+#     Performs spell checking while ensuring proper nouns remain unchanged.
+#     """
+#     protected = protect_proper_nouns(text, extract_proper_nouns(text))
+#     spell = SpellChecker()
+#     corrected = []
+#     for token, protected_flag in protected:
+#         if protected_flag or not token.isalpha():
+#             corrected.append(token)
+#             continue
+#         correction = spell.correction(token)
+#         if correction:
+#             if token[0].isupper():
+#                 corrected.append(correction.capitalize())
+#             else:
+#                 corrected.append(correction.lower())
+#         else:
+#             corrected.append(token)
+#     return detokenizer.detokenize(corrected)
+# def finalize_formatting(text):
+#     """
+#     Cleans up text formatting including spaces before punctuation, quote styles, and em dashes.
+#     """
+#     text = re.sub(r'\s+([.,!?;:])', r'\1', text)
+#     text = re.sub(r'([(])\s+', r'\1', text)
+#     text = re.sub(r'\s+([)])', r'\1', text)
+#     text = re.sub(r'\"(.*?)\"', r'“\1”', text)
+#     text = re.sub(r' -- ', r' — ', text)
+#     return text.strip()
+# def humanize_text(text):
+#     """
+#     Applies synonym replacement, sentence restructuring, and other enhancements to make text more natural.
+#     """
+#     dynamic_entities = extract_proper_nouns(text)
+#     protected_tokens = protect_proper_nouns(text, dynamic_entities)
+#     tokens = [t[0] for t in protected_tokens]
+#     tags = pos_tag(tokens)
+#     new_tokens = replace_words_with_synonyms(protected_tokens, tags)
+#     text = detokenizer.detokenize(new_tokens)
+#     return restructure_sentences(text)
+# def process_text(input_text):
+#     """
+#     Processes input text to enhance readability by applying all transformations sequentially.
+#     """
+#     if input_text:
+#         humanized = humanize_text(input_text)
+#         spell_checked = contextual_spell_check(humanized)
+#         final_output = finalize_formatting(spell_checked)
+#         return final_output
+#     return ""
+# # Gradio Interface
+# iface = gr.Interface(
+#     fn=process_text,
+#     inputs=gr.Textbox(lines=5, placeholder="Enter text here..."),
+#     outputs=gr.Textbox(lines=5),
+#     title="AI to Humanized Text Converter",
+#     description="Enter text and get a more humanized, readable output.",
+# )
+# iface.launch()