Skip to content

Commit

Permalink
Merge pull request #553 from chenyushuo/master
Browse files Browse the repository at this point in the history
Merge 0.1.x into master
  • Loading branch information
2017pxy authored Dec 6, 2020
2 parents c8f1ca7 + 90d5c3f commit cedd7ea
Show file tree
Hide file tree
Showing 29 changed files with 18,720 additions and 467 deletions.
45 changes: 45 additions & 0 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
name: RecBole tests

on:
- pull_request

jobs:
build:

runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.8]

steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pytest
pip install dgl
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
# Use "python -m pytest" instead of "pytest" to fix imports
- name: Test metrics
run: |
python -m pytest -v tests/metrics
- name: Test evaluation_setting
run: |
python -m pytest -v tests/evaluation_setting
- name: Test model
run: |
python -m pytest -v tests/model/test_model_auto.py
- name: Test config
run: |
python -m pytest -v tests/config/test_config.py
export PYTHONPATH=.
python tests/config/test_command_line.py --use_gpu=False --valid_metric=Recall@10 --split_ratio=[0.7,0.2,0.1] --metrics=['Recall@10'] --epochs=200 --eval_setting='LO_RS' --learning_rate=0.3
- name: Test evaluation_setting
run: |
python -m pytest -v tests/evaluation_setting
2 changes: 2 additions & 0 deletions recbole/data/dataloader/knowledge_dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,8 @@ def _next_batch_data(self):
elif self.state == KGDataLoaderState.RS:
return self.general_dataloader._next_batch_data()
elif self.state == KGDataLoaderState.RSKG:
if self.kg_dataloader.pr >= self.kg_dataloader.pr_end:
self.kg_dataloader.pr = 0
kg_data = self.kg_dataloader._next_batch_data()
rec_data = self.general_dataloader._next_batch_data()
rec_data.update(kg_data)
Expand Down
2 changes: 1 addition & 1 deletion recbole/data/dataloader/neg_sample_mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class NegSampleMixin(AbstractDataLoader):
batch_size (int, optional): The batch_size of dataloader. Defaults to ``1``.
dl_format (InputType, optional): The input type of dataloader. Defaults to
:obj:`~recbole.utils.InputType.POINTWISE`.
shuffle (bool, optional): Whether the dataloader will be shuffle after a round. Defaluts to ``False``.
shuffle (bool, optional): Whether the dataloader will be shuffle after a round. Defaults to ``False``.
"""
dl_type = DataLoaderType.NEGSAMPLE

Expand Down
2 changes: 1 addition & 1 deletion recbole/data/dataloader/sequential_dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def augmentation(self, uid_list, item_list_index, target_index, item_list_length
new_dict = {
self.uid_field: uid_list,
self.item_list_field: np.zeros((new_length, self.max_item_list_len), dtype=np.int64),
self.time_list_field: np.zeros((new_length, self.max_item_list_len), dtype=np.int64),
self.time_list_field: np.zeros((new_length, self.max_item_list_len)),
self.target_iid_field: self.dataset.inter_feat[self.iid_field][target_index].values,
self.target_time_field: self.dataset.inter_feat[self.time_field][target_index].values,
self.item_list_length_field: item_list_length,
Expand Down
56 changes: 52 additions & 4 deletions recbole/data/dataset/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,16 @@ class Dataset(object):
Specially, if feature is loaded from Arg ``additional_feat_suffix``, its source has type str,
which is the suffix of its local file (also the suffix written in Arg ``additional_feat_suffix``).
field2id_token (dict): Dict mapping feature name (str) to a list, which stores the original token of
this feature. For example, if ``test`` is token-like feature, ``token_a`` is remapped to 1, ``token_b``
field2id_token (dict): Dict mapping feature name (str) to a :class:`np.ndarray`, which stores the original token
of this feature. For example, if ``test`` is token-like feature, ``token_a`` is remapped to 1, ``token_b``
is remapped to 2. Then ``field2id_token['test'] = ['[PAD]', 'token_a', 'token_b']``. (Note that 0 is
always PADDING for token-like features.)
field2token_id (dict): Dict mapping feature name (str) to a dict, which stores the token remap table
of this feature. For example, if ``test`` is token-like feature, ``token_a`` is remapped to 1, ``token_b``
is remapped to 2. Then ``field2token_id['test'] = {'[PAD]': 0, 'token_a': 1, 'token_b': 2}``.
(Note that 0 is always PADDING for token-like features.)
field2seqlen (dict): Dict mapping feature name (str) to its sequence length (int).
For sequence features, their length can be either set in config,
or set to the max sequence length of this feature.
Expand Down Expand Up @@ -116,6 +121,7 @@ def _get_preset(self):
self.field2type = {}
self.field2source = {}
self.field2id_token = {}
self.field2token_id = {}
self.field2seqlen = self.config['seq_len'] or {}
self._preloaded_weight = {}
self.benchmark_filename_list = self.config['benchmark_filename']
Expand Down Expand Up @@ -897,11 +903,13 @@ def _remap(self, remap_list):
tokens, split_point = self._concat_remaped_tokens(remap_list)
new_ids_list, mp = pd.factorize(tokens)
new_ids_list = np.split(new_ids_list + 1, split_point)
mp = ['[PAD]'] + list(mp)
mp = np.array(['[PAD]'] + list(mp))
token_id = {t: i for i, t in enumerate(mp)}

for (feat, field, ftype), new_ids in zip(remap_list, new_ids_list):
if (field not in self.field2id_token):
if field not in self.field2id_token:
self.field2id_token[field] = mp
self.field2token_id[field] = token_id
if ftype == FeatureType.TOKEN:
feat[field] = new_ids
elif ftype == FeatureType.TOKEN_SEQ:
Expand Down Expand Up @@ -1010,6 +1018,46 @@ def copy_field_property(self, dest_field, source_field):
self.field2source[dest_field] = self.field2source[source_field]
self.field2seqlen[dest_field] = self.field2seqlen[source_field]

@dlapi.set()
def token2id(self, field, tokens):
"""Map external tokens to internal ids.
Args:
field (str): Field of external tokens.
tokens (str, list or np.ndarray): External tokens.
Returns:
int or np.ndarray: The internal ids of external tokens.
"""
if isinstance(tokens, str):
if tokens in self.field2token_id[field]:
return self.field2token_id[field][tokens]
else:
raise ValueError('token [{}] is not existed')
elif isinstance(tokens, (list, np.ndarray)):
return np.array([self.token2id(field, token) for token in tokens])
else:
raise TypeError('The type of tokens [{}] is not supported')

@dlapi.set()
def id2token(self, field, ids):
"""Map internal ids to external tokens.
Args:
field (str): Field of internal ids.
ids (int, list, np.ndarray or torch.Tensor): Internal ids.
Returns:
str or np.ndarray: The external tokens of internal ids.
"""
try:
return self.field2id_token[field][ids]
except IndexError:
if isinstance(ids, list):
raise ValueError('[{}] is not a one-dimensional list'.format(ids))
else:
raise ValueError('[{}] is not a valid ids'.format(ids))

@property
@dlapi.set()
def user_num(self):
Expand Down
2 changes: 1 addition & 1 deletion recbole/data/dataset/kg_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,7 @@ def _remap_ID_all(self):
item_tokens = self._get_rec_item_token()
super()._remap_ID_all()
self._sort_remaped_entities(item_tokens)
self.field2id_token[self.relation_field].append('[UI-Relation]')
self.field2id_token[self.relation_field] = np.append(self.field2id_token[self.relation_field], '[UI-Relation]')

@property
@dlapi.set()
Expand Down
3 changes: 1 addition & 2 deletions recbole/model/general_recommender/dmf.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,7 @@ def get_item_embedding(self):
col = interaction_matrix.col
i = torch.LongTensor([row, col])
data = torch.FloatTensor(interaction_matrix.data)
item_matrix = torch.sparse.FloatTensor(i, data).to(self.device).transpose(0, 1)

item_matrix = torch.sparse.FloatTensor(i, data, torch.Size(interaction_matrix.shape)).to(self.device).transpose(0, 1)
item = torch.sparse.mm(item_matrix, self.item_linear.weight.t())

item = self.item_fc_layers(item)
Expand Down
12 changes: 10 additions & 2 deletions recbole/model/general_recommender/fism.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,13 @@
https://github.com/AaronHeee/Neural-Attentive-Item-Similarity-Model
"""

from logging import getLogger

import torch
import torch.nn as nn
from torch.nn.init import normal_

from recbole.model.abstract_recommender import GeneralRecommender
from recbole.utils import InputType
from torch.nn.init import normal_


class FISM(GeneralRecommender):
Expand All @@ -36,6 +37,8 @@ def __init__(self, config, dataset):

# load dataset info
self.LABEL = config['LABEL_FIELD']
self.logger = getLogger()

# get all users's history interaction information.the history item
# matrix is padding by the maximum number of a user's interactions
self.history_item_matrix, self.history_lens, self.mask_mat = self.get_history_info(dataset)
Expand All @@ -49,6 +52,11 @@ def __init__(self, config, dataset):
# split the too large dataset into the specified pieces
if self.split_to > 0:
self.group = torch.chunk(torch.arange(self.n_items).to(self.device), self.split_to)
else:
self.logger.warning('Pay Attetion!! the `split_to` is set to 0. If you catch a OMM error in this case, ' + \
'you need to increase it \n\t\t\tuntil the error disappears. For example, ' + \
'you can append it in the command line such as `--split_to=5`')


# define layers and loss
# construct source and destination item embedding matrix
Expand Down
11 changes: 7 additions & 4 deletions recbole/model/general_recommender/gcmc.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,10 +143,13 @@ def get_norm_adj_mat(self):
# build adj matrix
A = sp.dok_matrix((self.n_users + self.n_items,
self.n_users + self.n_items), dtype=np.float32)
A = A.tolil()
A[:self.n_users, self.n_users:] = self.interaction_matrix
A[self.n_users:, :self.n_users] = self.interaction_matrix.transpose()
A = A.todok()
inter_M = self.interaction_matrix
inter_M_t = self.interaction_matrix.transpose()
data_dict = dict(zip(zip(inter_M.row, inter_M.col+self.n_users),
[1]*inter_M.nnz))
data_dict.update(dict(zip(zip(inter_M_t.row+self.n_users, inter_M_t.col),
[1]*inter_M_t.nnz)))
A._update(data_dict)
# norm adj matrix
sumArr = (A > 0).sum(axis=1)
# add epsilon to avoid Devide by zero Warning
Expand Down
11 changes: 7 additions & 4 deletions recbole/model/general_recommender/lightgcn.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,13 @@ def get_norm_adj_mat(self):
# build adj matrix
A = sp.dok_matrix((self.n_users + self.n_items,
self.n_users + self.n_items), dtype=np.float32)
A = A.tolil()
A[:self.n_users, self.n_users:] = self.interaction_matrix
A[self.n_users:, :self.n_users] = self.interaction_matrix.transpose()
A = A.todok()
inter_M = self.interaction_matrix
inter_M_t = self.interaction_matrix.transpose()
data_dict = dict(zip(zip(inter_M.row, inter_M.col+self.n_users),
[1]*inter_M.nnz))
data_dict.update(dict(zip(zip(inter_M_t.row+self.n_users, inter_M_t.col),
[1]*inter_M_t.nnz)))
A._update(data_dict)
# norm adj matrix
sumArr = (A > 0).sum(axis=1)
# add epsilon to avoid Devide by zero Warning
Expand Down
4 changes: 4 additions & 0 deletions recbole/model/general_recommender/nais.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@ def __init__(self, config, dataset):
if self.split_to > 0:
self.logger.info('split the n_items to {} pieces'.format(self.split_to))
self.group = torch.chunk(torch.arange(self.n_items).to(self.device), self.split_to)
else:
self.logger.warning('Pay Attetion!! the `split_to` is set to 0. If you catch a OMM error in this case, ' + \
'you need to increase it \n\t\t\tuntil the error disappears. For example, ' + \
'you can append it in the command line such as `--split_to=5`')

# define layers and loss
# construct source and destination item embedding matrix
Expand Down
13 changes: 8 additions & 5 deletions recbole/model/general_recommender/ngcf.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def __init__(self, config, dataset):
super(NGCF, self).__init__(config, dataset)

# load dataset info
self.interaction_matrix = dataset.inter_matrix(form='csr').astype(np.float32)
self.interaction_matrix = dataset.inter_matrix(form='coo').astype(np.float32)

# load parameters info
self.embedding_size = config['embedding_size']
Expand Down Expand Up @@ -117,10 +117,13 @@ def get_norm_adj_mat(self):
"""
# build adj matrix
A = sp.dok_matrix((self.n_users + self.n_items, self.n_users + self.n_items), dtype=np.float32)
A = A.tolil()
A[:self.n_users, self.n_users:] = self.interaction_matrix
A[self.n_users:, :self.n_users] = self.interaction_matrix.transpose()
A = A.todok()
inter_M = self.interaction_matrix
inter_M_t = self.interaction_matrix.transpose()
data_dict = dict(zip(zip(inter_M.row, inter_M.col + self.n_users),
[1] * inter_M.nnz))
data_dict.update(dict(zip(zip(inter_M_t.row + self.n_users, inter_M_t.col),
[1] * inter_M_t.nnz)))
A._update(data_dict)
# norm adj matrix
sumArr = (A > 0).sum(axis=1)
diag = np.array(sumArr.flatten())[0] + 1e-7 # add epsilon to avoid Devide by zero Warning
Expand Down
9 changes: 6 additions & 3 deletions recbole/model/general_recommender/pop.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@
# @Time : 2020/8/11 9:57
# @Author : Zihan Lin
# @Email : [email protected]

# UPDATE
# @Time : 2020/11/9
# @Author : Zihan Lin
# @Email : [email protected]
r"""
Pop
################################################
Expand Down Expand Up @@ -44,8 +47,8 @@ def calculate_loss(self, interaction):
def predict(self, interaction):

item = interaction[self.ITEM_ID]
result = self.item_cnt[item, :] / self.max_cnt
return result
result = torch.true_divide(self.item_cnt[item, :], self.max_cnt)
return result.squeeze()

def full_sort_predict(self, interaction):
batch_user_num = interaction[self.USER_ID].shape[0]
Expand Down
11 changes: 7 additions & 4 deletions recbole/model/general_recommender/spectralcf.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,10 +96,13 @@ def get_laplacian_matrix(self):
# build adj matrix
A = sp.dok_matrix((self.n_users + self.n_items,
self.n_users + self.n_items), dtype=np.float32)
A = A.tolil()
A[:self.n_users, self.n_users:] = self.interaction_matrix
A[self.n_users:, :self.n_users] = self.interaction_matrix.transpose()
A = A.todok()
inter_M = self.interaction_matrix
inter_M_t = self.interaction_matrix.transpose()
data_dict = dict(zip(zip(inter_M.row, inter_M.col+self.n_users),
[1]*inter_M.nnz))
data_dict.update(dict(zip(zip(inter_M_t.row+self.n_users, inter_M_t.col),
[1]*inter_M_t.nnz)))
A._update(data_dict)

# norm adj matrix
sumArr = (A > 0).sum(axis=1)
Expand Down
2 changes: 1 addition & 1 deletion recbole/sampler/sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def sample_by_key_ids(self, key_ids, num, used_ids):
key_num = len(key_ids)
total_num = key_num * num
value_ids = np.zeros(total_num, dtype=np.int64)
used_id_list = np.repeat(used_ids, num)
used_id_list = np.tile(used_ids, num)
for i, used_ids in enumerate(used_id_list):
cur = self.random()
while cur in used_ids:
Expand Down
Loading

0 comments on commit cedd7ea

Please sign in to comment.