Spaces:
Running
Running
Zhu Lin
commited on
update
Browse files- .DS_Store +0 -0
- README.md +6 -5
- app.py +13 -0
- config.json +33 -0
- configuration_bert.py +26 -0
- requirements.txt +11 -0
.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
README.md
CHANGED
|
@@ -1,12 +1,13 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: 4.38.1
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
|
|
|
| 10 |
---
|
| 11 |
|
| 12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 1 |
---
|
| 2 |
+
title: DNABERT-2 demo
|
| 3 |
+
emoji: 🐨
|
| 4 |
+
colorFrom: yellow
|
| 5 |
+
colorTo: green
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: 4.38.1
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
+
license: bigscience-openrail-m
|
| 11 |
---
|
| 12 |
|
| 13 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
from transformers import AutoTokenizer, AutoModel, pipeline
|
| 3 |
+
from transformers.models.bert.configuration_bert import BertConfig
|
| 4 |
+
import gradio as gr
|
| 5 |
+
|
| 6 |
+
config = BertConfig.from_pretrained("czl/dnabert2")
|
| 7 |
+
tokenizer = AutoTokenizer.from_pretrained("czl/dnabert2", trust_remote_code=True)
|
| 8 |
+
model = AutoModel.from_pretrained("czl/dnabert2", trust_remote_code=True, config=config)
|
| 9 |
+
|
| 10 |
+
pipe = pipeline("feature-extraction", model=model, tokenizer=tokenizer)
|
| 11 |
+
|
| 12 |
+
demo = gr.Interface.from_pipeline(pipe)
|
| 13 |
+
demo.launch()
|
config.json
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "czl/dnabert2",
|
| 3 |
+
"alibi_starting_size": 512,
|
| 4 |
+
"architectures": [
|
| 5 |
+
"BertForMaskedLM"
|
| 6 |
+
],
|
| 7 |
+
"attention_probs_dropout_prob": 0,
|
| 8 |
+
"auto_map": {
|
| 9 |
+
"AutoConfig": "configuration_bert.BertConfig",
|
| 10 |
+
"AutoModel": "bert_layers.BertModel",
|
| 11 |
+
"AutoModelForMaskedLM": "bert_layers.BertForMaskedLM",
|
| 12 |
+
"AutoModelForSequenceClassification": "bert_layers.BertForSequenceClassification"
|
| 13 |
+
},
|
| 14 |
+
"classifier_dropout": null,
|
| 15 |
+
"gradient_checkpointing": false,
|
| 16 |
+
"hidden_act": "gelu",
|
| 17 |
+
"hidden_dropout_prob": 0.1,
|
| 18 |
+
"hidden_size": 768,
|
| 19 |
+
"initializer_range": 0.02,
|
| 20 |
+
"intermediate_size": 3072,
|
| 21 |
+
"layer_norm_eps": 1e-12,
|
| 22 |
+
"max_position_embeddings": 512,
|
| 23 |
+
"model_type": "bert",
|
| 24 |
+
"num_attention_heads": 12,
|
| 25 |
+
"num_hidden_layers": 12,
|
| 26 |
+
"position_embedding_type": "absolute",
|
| 27 |
+
"torch_dtype": "float32",
|
| 28 |
+
"transformers_version": "4.28.0",
|
| 29 |
+
"type_vocab_size": 2,
|
| 30 |
+
"use_cache": true,
|
| 31 |
+
"vocab_size": 4096,
|
| 32 |
+
"flash_attn": false
|
| 33 |
+
}
|
configuration_bert.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright 2022 MosaicML Examples authors
|
| 2 |
+
# SPDX-License-Identifier: Apache-2.0
|
| 3 |
+
|
| 4 |
+
from transformers import BertConfig as TransformersBertConfig
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class BertConfig(TransformersBertConfig):
|
| 8 |
+
|
| 9 |
+
def __init__(
|
| 10 |
+
self,
|
| 11 |
+
alibi_starting_size: int = 512,
|
| 12 |
+
attention_probs_dropout_prob: float = 0.0,
|
| 13 |
+
**kwargs,
|
| 14 |
+
):
|
| 15 |
+
"""Configuration class for MosaicBert.
|
| 16 |
+
|
| 17 |
+
Args:
|
| 18 |
+
alibi_starting_size (int): Use `alibi_starting_size` to determine how large of an alibi tensor to
|
| 19 |
+
create when initializing the model. You should be able to ignore this parameter in most cases.
|
| 20 |
+
Defaults to 512.
|
| 21 |
+
attention_probs_dropout_prob (float): By default, turn off attention dropout in Mosaic BERT
|
| 22 |
+
(otherwise, Flash Attention will be off by default). Defaults to 0.0.
|
| 23 |
+
"""
|
| 24 |
+
super().__init__(
|
| 25 |
+
attention_probs_dropout_prob=attention_probs_dropout_prob, **kwargs)
|
| 26 |
+
self.alibi_starting_size = alibi_starting_size
|
requirements.txt
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
transformers[torch]
|
| 2 |
+
torch
|
| 3 |
+
torchvision
|
| 4 |
+
torchaudio
|
| 5 |
+
einops
|
| 6 |
+
peft
|
| 7 |
+
omegaconf
|
| 8 |
+
evaluate
|
| 9 |
+
accelerate
|
| 10 |
+
gradio
|
| 11 |
+
spaces
|