Spaces:
Runtime error
Runtime error
| import numpy as np | |
| import re | |
| import spacy | |
| import openai | |
| from operator import itemgetter | |
| #user input manager class | |
| class input_manager: | |
| #initialize key dictionary from vector data frame | |
| def __init__(self,key_df, slim_df, search_tokens): | |
| self.key_df = key_df | |
| self.slim_df = slim_df | |
| self.search_tokens = search_tokens | |
| self.key = dict(zip(list(key_df.columns),np.zeros(len(key_df.columns)))) | |
| self.nlp = spacy.load("en_core_web_md") | |
| #translate input text to vector | |
| def set_input(self,input_cats): | |
| #need setup to apply correct group tag to values | |
| #separate known/unknown features | |
| k_flags = [cat for cat in input_cats if cat in list(self.key.keys())] | |
| unk_flags = [cat for cat in input_cats if cat not in list(self.key.keys())] | |
| #process within feature class similarity for each unknown input | |
| if len(unk_flags)>0: | |
| outs = [] | |
| for word in unk_flags: | |
| if re.match(r"game_type_",word): | |
| tok = self.nlp(word.split("_")[-1]) | |
| mtch = max([(key,key.similarity(tok)) for key in self.search_tokens[0]],key=itemgetter(1)) | |
| #if no known match is found (model doesn't recognize input word), we're going to discard - other solutions performance prohibitive | |
| if mtch[1]>0: | |
| outs.append("game_type_"+mtch[0]) | |
| elif re.match(r"mechanic_",word): | |
| tok = self.nlp(word.split("_")[-1]) | |
| mtch = max([(key,key.similarity(tok)) for key in self.search_tokens[1]],key=itemgetter(1)) | |
| if mtch[1]>0: | |
| outs.append("mechanic_"+mtch[0]) | |
| elif re.match(r"category_",word): | |
| tok = self.nlp(word.split("_")[-1]) | |
| mtch=max([(key,key.similarity(tok)) for key in self.search_tokens[2]],key=itemgetter(1)) | |
| if mtch[1]>0: | |
| outs.append("category_"+mtch[0]) | |
| elif re.match(r"family_",word): | |
| tok = self.nlp(word.split("_")[-1]) | |
| mtch=max([(key,key.similarity(tok)) for key in self.search_tokens[3]],key=itemgetter(1)) | |
| if mtch[1]>0: | |
| outs.append("family_"+str(mtch[0])) | |
| #if unks are processed, rejoin nearest match to known. | |
| k_flags = list(set(k_flags+outs)) | |
| #preserve global key and ouput copy w/input keys activated to 1 | |
| d = self.key.copy() | |
| for cat in k_flags: | |
| d[cat] = 1.0 | |
| # DELETE ME | |
| return d | |
| def input_parser(self,in_vec): | |
| #extracting keys from processed vector | |
| ks = [k for k,v in in_vec.items() if v == 1] | |
| return ks | |
| class model_control: | |
| def __init__(self, apikey, model_id): | |
| self.api_key = apikey | |
| openai.api_key = self.api_key | |
| self.prompt = None | |
| self.model = openai.FineTune.retrieve(id=model_id).fine_tuned_model | |
| def prompt_formatter(self,ks): | |
| self.prompt = ". ".join(ks) + "\n\n###\n\n" | |
| def call_api(self,status=0): | |
| if status == 0: | |
| temp=0.5 | |
| pres=0.7 | |
| elif status == 1: | |
| temp=0.4 | |
| pres=0.6 | |
| elif status == 2: | |
| temp=0.5 | |
| pres=0.8 | |
| answer = openai.Completion.create( | |
| model=self.model, | |
| prompt=self.prompt, | |
| max_tokens=512, | |
| temperature=temp, | |
| stop=["END"], | |
| presence_penalty=pres, | |
| frequency_penalty=0.5 | |
| ) | |
| return answer['choices'][0]['text'] | |
| def resp_cleanup(self,text): | |
| if ((text[-1] != "!") & (text[-1] != ".") & (text[-1] != "?")): | |
| text = " ".join([e+'.' for e in text.split('.')[0:-1] if e]) | |
| sent = re.split(r'([.?!:])', text) | |
| phrases = ["[Dd]esigned by","[Dd]esigner of","[Aa]rt by","[Aa]rtist of","[Pp]ublished","[Pp]ublisher of"] | |
| pat = re.compile("(?:" + "|".join(phrases) + ")") | |
| fix = re.compile("(?<=[.!?])[.!?]") | |
| text = re.sub(fix,'',''.join([s for s in sent if pat.search(s) == None])) | |
| return text | |