Spaces:

MUSKAN17
/

AI-To-Humanize

Running

App Files Files Community

AI-To-Humanize / app.py

MUSKAN17

update

889a97e verified 10 months ago

raw

history blame contribute delete

4.91 kB

	import gradio as gr
	from nltk import pos_tag, ne_chunk
	from nltk.tokenize import word_tokenize, sent_tokenize
	from nltk.tokenize.treebank import TreebankWordDetokenizer
	from nltk.corpus import wordnet, brown
	from spellchecker import SpellChecker
	import re
	import nltk
	nltk.download('brown')
	nltk.download('punkt')
	nltk.download('averaged_perceptron_tagger')
	nltk.download('maxent_ne_chunker')
	nltk.download('words')
	nltk.download('wordnet')
	# Preload resources
	detokenizer = TreebankWordDetokenizer()
	global_freq_dist = nltk.FreqDist(w.lower() for w in brown.words())

	def extract_proper_nouns(text):
	tokens = word_tokenize(text)
	tagged = pos_tag(tokens)
	chunks = ne_chunk(tagged)
	proper_nouns = set()

	for chunk in chunks:
	if hasattr(chunk, 'label') and chunk.label() in ('PERSON', 'ORGANIZATION', 'GPE'):
	entity = " ".join(c[0] for c in chunk)
	proper_nouns.add(entity.lower())
	return proper_nouns

	def protect_proper_nouns(text, dynamic_entities):
	tokens = word_tokenize(text)
	protected = []
	for token in tokens:
	lower_token = token.lower()
	if any(lower_token in entity for entity in dynamic_entities):
	protected.append((token, True))
	else:
	protected.append((token, False))
	return protected

	def replace_words_with_synonyms(tokens_with_protection, pos_tags):
	new_tokens = []
	for (token, protected), (_, tag) in zip(tokens_with_protection, pos_tags):
	if protected or not token.isalpha():
	new_tokens.append(token)
	continue

	pos = None
	if tag.startswith('JJ'):
	pos = wordnet.ADJ
	elif tag.startswith('RB'):
	pos = wordnet.ADV
	elif tag.startswith('NN'):
	pos = wordnet.NOUN
	elif tag.startswith('VB'):
	pos = wordnet.VERB

	if pos:
	candidates = []
	for syn in wordnet.synsets(token, pos=pos):
	for lemma in syn.lemmas():
	candidate = lemma.name().replace('_', ' ')
	if candidate.lower() == token.lower() or ' ' in candidate:
	continue
	if len(candidate) > len(token) * 1.2:
	continue
	candidates.append(candidate)
	if candidates:
	best_candidate = max(candidates,
	key=lambda x: global_freq_dist[x.lower()],
	default=token)
	new_tokens.append(best_candidate)
	else:
	new_tokens.append(token)
	else:
	new_tokens.append(token)
	return new_tokens

	def restructure_sentences(text):
	sentences = sent_tokenize(text)
	restructured = []
	for sent in sentences:
	if len(sent.split()) > 25:
	parts = re.split(r'[,;]', sent)
	if len(parts) > 1:
	sent = parts[0] + '. ' + ' '.join(parts[1:])
	restructured.append(sent)
	return ' '.join(restructured)

	def contextual_spell_check(text):
	protected = protect_proper_nouns(text, extract_proper_nouns(text))
	spell = SpellChecker()
	corrected = []
	for token, protected_flag in protected:
	if protected_flag or not token.isalpha():
	corrected.append(token)
	continue
	correction = spell.correction(token)
	if correction:
	if token[0].isupper():
	corrected.append(correction.capitalize())
	else:
	corrected.append(correction.lower())
	else:
	corrected.append(token)
	return detokenizer.detokenize(corrected)

	def finalize_formatting(text):
	text = re.sub(r'\s+([.,!?;:])', r'\1', text)
	text = re.sub(r'([(])\s+', r'\1', text)
	text = re.sub(r'\s+([)])', r'\1', text)
	text = re.sub(r'\"(.*?)\"', r'“\1”', text)
	text = re.sub(r' -- ', r' — ', text)
	return text.strip()

	def humanize_text(text):
	dynamic_entities = extract_proper_nouns(text)
	protected_tokens = protect_proper_nouns(text, dynamic_entities)
	tokens = [t[0] for t in protected_tokens]
	tags = pos_tag(tokens)
	new_tokens = replace_words_with_synonyms(protected_tokens, tags)
	text = detokenizer.detokenize(new_tokens)
	return restructure_sentences(text)

	def process_text(input_text):
	if input_text:
	humanized = humanize_text(input_text)
	spell_checked = contextual_spell_check(humanized)
	final_output = finalize_formatting(spell_checked)
	return final_output
	return ""

	# Gradio Interface
	iface = gr.Interface(
	fn=process_text,
	inputs=gr.Textbox(lines=5, placeholder="Enter text here..."),
	outputs=gr.Textbox(lines=5),
	title="AI to Humanized Text Converter",
	description="Enter text and get a more humanized, readable output.",
	)

	iface.launch()