Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update transformers lib #1

Merged
merged 6 commits into from
Dec 8, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 2 additions & 8 deletions blink/biencoder/biencoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,7 @@
import torch.nn.functional as F
from tqdm import tqdm

from pytorch_transformers.modeling_bert import (
BertPreTrainedModel,
BertConfig,
BertModel,
)

from pytorch_transformers.tokenization_bert import BertTokenizer
from transformers import AutoTokenizer, BertModel

from blink.common.ranker_base import BertEncoder, get_model_obj
from blink.common.optimizer import get_bert_optimizer
Expand Down Expand Up @@ -82,7 +76,7 @@ def __init__(self, params, shared=None):
self.NULL_IDX = 0
self.START_TOKEN = "[CLS]"
self.END_TOKEN = "[SEP]"
self.tokenizer = BertTokenizer.from_pretrained(
self.tokenizer = AutoTokenizer.from_pretrained(
params["bert_model"], do_lower_case=params["lowercase"]
)
# init model
Expand Down
3 changes: 0 additions & 3 deletions blink/biencoder/data_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@
from tqdm import tqdm, trange
from torch.utils.data import DataLoader, TensorDataset

from pytorch_transformers.tokenization_bert import BertTokenizer

from blink.biencoder.zeshel_utils import world_to_id
from blink.common.params import ENT_START_TAG, ENT_END_TAG, ENT_TITLE_TAG

Expand Down Expand Up @@ -170,7 +168,6 @@ def process_mention_data(
logger.info(
"Label ids : " + " ".join([str(v) for v in sample["label"]["ids"]])
)
logger.info("Src : %d" % sample["src"][0])
logger.info("Label_id : %d" % sample["label_idx"][0])

context_vecs = torch.tensor(
Expand Down
2 changes: 0 additions & 2 deletions blink/biencoder/eval_biencoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@

from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset

from pytorch_transformers.tokenization_bert import BertTokenizer

from blink.biencoder.biencoder import BiEncoderRanker
import blink.biencoder.data_process as data
import blink.biencoder.nn_prediction as nnquery
Expand Down
15 changes: 5 additions & 10 deletions blink/biencoder/train_biencoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,13 @@
from collections import OrderedDict

from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset

from pytorch_transformers.file_utils import PYTORCH_PRETRAINED_BERT_CACHE
from pytorch_transformers.optimization import WarmupLinearSchedule
from pytorch_transformers.tokenization_bert import BertTokenizer
from pytorch_transformers.modeling_utils import WEIGHTS_NAME
from transformers import WEIGHTS_NAME, get_linear_schedule_with_warmup

from blink.biencoder.biencoder import BiEncoderRanker, load_biencoder
import logging

import blink.candidate_ranking.utils as utils
import blink.biencoder.data_process as data
from blink.biencoder.zeshel_utils import DOC_PATH, WORLDS, world_to_id
from blink.common.optimizer import get_bert_optimizer
from blink.common.params import BlinkParser

Expand All @@ -59,7 +54,7 @@ def evaluate(

for step, batch in enumerate(iter_):
batch = tuple(t.to(device) for t in batch)
context_input, candidate_input, _, _ = batch
context_input, candidate_input, _ = batch
with torch.no_grad():
eval_loss, logits = reranker(context_input, candidate_input)

Expand Down Expand Up @@ -98,8 +93,8 @@ def get_scheduler(params, optimizer, len_train_data, logger):
num_train_steps = int(len_train_data / batch_size / grad_acc) * epochs
num_warmup_steps = int(num_train_steps * params["warmup_proportion"])

scheduler = WarmupLinearSchedule(
optimizer, warmup_steps=num_warmup_steps, t_total=num_train_steps,
scheduler = get_linear_schedule_with_warmup(
optimizer, num_warmup_steps=num_warmup_steps, num_training_steps=num_train_steps,
)
logger.info(" Num optimization steps = %d" % num_train_steps)
logger.info(" Num warmup steps = %d", num_warmup_steps)
Expand Down Expand Up @@ -225,7 +220,7 @@ def main(params):

for step, batch in enumerate(iter_):
batch = tuple(t.to(device) for t in batch)
context_input, candidate_input, _, _ = batch
context_input, candidate_input, _ = batch
loss, _ = reranker(context_input, candidate_input)

# if n_gpu > 1:
Expand Down
27 changes: 13 additions & 14 deletions blink/candidate_ranking/bert_reranking.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,19 @@
import os
import numpy as np

from pytorch_transformers.modeling_bert import (
from transformers import (
AdamW,
AutoTokenizer,
BertPreTrainedModel,
BertConfig,
BertModel,
PYTORCH_PRETRAINED_BERT_CACHE,
get_linear_schedule_with_warmup,
)
from pytorch_transformers.tokenization_bert import BertTokenizer
from torch.utils.data import DataLoader, SequentialSampler, TensorDataset
from torch import nn
from torch.nn import CrossEntropyLoss, MSELoss
from tqdm import tqdm

from pytorch_transformers.optimization import AdamW, WarmupLinearSchedule
from pytorch_transformers.file_utils import PYTORCH_PRETRAINED_BERT_CACHE


class BertForReranking(BertPreTrainedModel):
r"""
Expand All @@ -43,9 +42,9 @@ class BertForReranking(BertPreTrainedModel):

``token_type_ids: 0 0 0 0 0 0 0``

Indices can be obtained using :class:`pytorch_transformers.BertTokenizer`.
See :func:`pytorch_transformers.PreTrainedTokenizer.encode` and
:func:`pytorch_transformers.PreTrainedTokenizer.convert_tokens_to_ids` for details.
Indices can be obtained using :class:`transformers.AutoTokenizer`.
See :func:`transformers.PreTrainedTokenizer.encode` and
:func:`transformers.PreTrainedTokenizer.convert_tokens_to_ids` for details.
**token_type_ids**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, num_choices, sequence_length)``:
Segment token indices to indicate first and second portions of the inputs.
The second dimension of the input (`num_choices`) indicates the number of choices to score.
Expand Down Expand Up @@ -82,7 +81,7 @@ class BertForReranking(BertPreTrainedModel):

Examples::

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
model = BertForMultipleChoice.from_pretrained('bert-base-uncased')
choices = ["Hello, my dog is cute", "Hello, my cat is amazing"]
input_ids = torch.tensor([tokenizer.encode(s) for s in choices]).unsqueeze(0) # Batch size 1, 2 choices
Expand Down Expand Up @@ -268,10 +267,10 @@ def get_scheduler_and_optimizer(self, parameters, train_tensor_data, logger):
correct_bias=False,
)

scheduler = WarmupLinearSchedule(
scheduler = get_linear_schedule_with_warmup(
optimizer,
warmup_steps=num_warmup_steps,
t_total=num_train_optimization_steps,
num_warmup_steps=num_warmup_steps,
num_training_steps=num_train_optimization_steps,
)

logger.info(" Num optimization steps = %d", num_train_optimization_steps)
Expand All @@ -294,7 +293,7 @@ def get_model(parameters):

@staticmethod
def get_tokenizer(parameters):
tokenizer = BertTokenizer.from_pretrained(
tokenizer = AutoTokenizer.from_pretrained(
parameters["path_to_model"], do_lower_case=parameters["lowercase_flag"]
)
return tokenizer
Expand Down
3 changes: 0 additions & 3 deletions blink/candidate_ranking/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,6 @@
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset
from torch.utils.data.distributed import DistributedSampler

from pytorch_transformers.file_utils import PYTORCH_PRETRAINED_BERT_CACHE
from pytorch_transformers.tokenization_bert import BertTokenizer

import blink.candidate_retrieval.utils
from blink.candidate_ranking.bert_reranking import BertForReranking
import logging
Expand Down
2 changes: 1 addition & 1 deletion blink/candidate_ranking/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import numpy as np

from collections import OrderedDict
from pytorch_transformers.modeling_utils import CONFIG_NAME, WEIGHTS_NAME
from transformers import CONFIG_NAME, WEIGHTS_NAME
from tqdm import tqdm

from blink.candidate_ranking.bert_reranking import BertReranker
Expand Down
9 changes: 1 addition & 8 deletions blink/common/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,9 @@
import os
import numpy as np

from pytorch_transformers.modeling_bert import (
BertPreTrainedModel,
BertConfig,
BertModel,
)
from pytorch_transformers.tokenization_bert import BertTokenizer
from torch import nn

from pytorch_transformers.file_utils import PYTORCH_PRETRAINED_BERT_CACHE
from pytorch_transformers.optimization import AdamW
from transformers import AdamW


patterns_optimizer = {
Expand Down
2 changes: 1 addition & 1 deletion blink/common/params.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ def add_training_args(self, args=None):
default=0.1,
type=float,
help="Proportion of training to perform linear learning rate warmup for. "
"E.g., 0.1 = 10% of training.",
"E.g., 0.1 = 10 percent of training.",
)
parser.add_argument(
"--gradient_accumulation_steps",
Expand Down
5 changes: 2 additions & 3 deletions blink/common/ranker_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,8 @@ def __init__(
self.additional_linear = None

def forward(self, token_ids, segment_ids, attention_mask):
output_bert, output_pooler = self.bert_model(
token_ids, segment_ids, attention_mask
)
output = self.bert_model(input_ids=token_ids, token_type_ids=segment_ids, attention_mask=attention_mask)
output_bert, output_pooler = output.last_hidden_state, output.pooler_output
# get embedding of [CLS] token
if self.additional_linear is not None:
embeddings = output_pooler
Expand Down
20 changes: 3 additions & 17 deletions blink/crossencoder/crossencoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,21 +12,7 @@

from collections import OrderedDict
from tqdm import tqdm
from pytorch_transformers.modeling_utils import CONFIG_NAME, WEIGHTS_NAME

from pytorch_transformers.modeling_bert import (
BertPreTrainedModel,
BertConfig,
BertModel,
)

from pytorch_transformers.modeling_roberta import (
RobertaConfig,
RobertaModel,
)

from pytorch_transformers.tokenization_bert import BertTokenizer
from pytorch_transformers.tokenization_roberta import RobertaTokenizer
from transformers import AutoTokenizer, BertModel, RobertaModel, CONFIG_NAME, WEIGHTS_NAME

from blink.common.ranker_base import BertEncoder, get_model_obj
from blink.common.optimizer import get_bert_optimizer
Expand Down Expand Up @@ -73,9 +59,9 @@ def __init__(self, params, shared=None):
self.n_gpu = torch.cuda.device_count()

if params.get("roberta"):
self.tokenizer = RobertaTokenizer.from_pretrained(params["bert_model"],)
self.tokenizer = AutoTokenizer.from_pretrained(params["bert_model"],)
else:
self.tokenizer = BertTokenizer.from_pretrained(
self.tokenizer = AutoTokenizer.from_pretrained(
params["bert_model"], do_lower_case=params["lowercase"]
)

Expand Down
9 changes: 3 additions & 6 deletions blink/crossencoder/train_cross.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,7 @@
from collections import OrderedDict

from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset

from pytorch_transformers.file_utils import PYTORCH_PRETRAINED_BERT_CACHE
from pytorch_transformers.optimization import WarmupLinearSchedule
from pytorch_transformers.tokenization_bert import BertTokenizer
from transformers import get_linear_schedule_with_warmup

import blink.candidate_retrieval.utils
from blink.crossencoder.crossencoder import CrossEncoderRanker, load_crossencoder
Expand Down Expand Up @@ -148,8 +145,8 @@ def get_scheduler(params, optimizer, len_train_data, logger):
num_train_steps = int(len_train_data / batch_size / grad_acc) * epochs
num_warmup_steps = int(num_train_steps * params["warmup_proportion"])

scheduler = WarmupLinearSchedule(
optimizer, warmup_steps=num_warmup_steps, t_total=num_train_steps,
scheduler = get_linear_schedule_with_warmup(
optimizer, num_warmup_steps=num_warmup_steps, num_training_steps=num_train_steps,
)
logger.info(" Num optimization steps = %d" % num_train_steps)
logger.info(" Num warmup steps = %d", num_warmup_steps)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
"numpy>=1.17.2",
"segtok>=1.5.7",
"flair>=0.4.3",
"pytorch-transformers>=1.2.0",
"transformers>=4.0.0",
"colorama>=0.4.3",
"termcolor>=1.1.0",
"faiss-cpu>=1.6.1",
Expand Down
20 changes: 20 additions & 0 deletions train_blink_biencoder.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
export PYTHONPATH=.

BATCH=32
EPOCH=1
DATA=data/japanese/training
OUTPUT=models/debug

# lowercase は store_false
python blink/biencoder/train_biencoder.py \
--output_path $OUTPUT \
--bert_model cl-tohoku/bert-base-japanese-v2 \
--lowercase \
--train_batch_size $BATCH \
--eval_batch_size 32 \
--gradient_accumulation_steps 1 \
--num_train_epochs $EPOCH \
--data_path $DATA \
--shuffle True \
--eval_interval 100 \
--debug