Skip to content

Commit

Permalink
remove unnecessary dependencies
Browse files Browse the repository at this point in the history
  • Loading branch information
saeeddhqan committed Sep 19, 2024
1 parent e40c509 commit 2bb4ea2
Show file tree
Hide file tree
Showing 13 changed files with 53 additions and 205 deletions.
1 change: 0 additions & 1 deletion maryam.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#!/usr/bin/env python3

from maryam import __main__
import sys
Expand Down
12 changes: 9 additions & 3 deletions maryam/core/util/iris/meta_search_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@
from math import trunc

class main:

def __init__(self):
self.framework = main.framework


def make_cite(self, url: 'URL string') -> 'cite':
urlib = self.framework.urlib(url)
path = urlib.path
Expand All @@ -40,6 +42,7 @@ def make_cite(self, url: 'URL string') -> 'cite':
cite = f"{host}{path}"
return cite


def remove_dups(self, res):
urls = []
new = []
Expand All @@ -50,6 +53,7 @@ def remove_dups(self, res):
new.append(i)
return new


def simple_merge(results) -> 'merging results based on quality of engines':
engines_len = len(results)
merged = []
Expand All @@ -65,14 +69,16 @@ def simple_merge(results) -> 'merging results based on quality of engines':

return merged


def compute_count_consensus(
e: dict(type=list, help='list of search engines sorted by quality'),
l: dict(type=int, help='number of results')) -> 'a list of numbers':
l: dict(type=int, help='number of results'),
) -> 'a list of numbers':
x = len(e)
o = {}
for i in e:
o[i] = trunc(l/x)
l -= l - (l%x)
o[i] = trunc(l / x)
l -= l - (l % x)
if l != 0:
if l < x:
for i in range(l):
Expand Down
11 changes: 5 additions & 6 deletions maryam/core/util/iris/retriever.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#!/usr/bin/env python3
"""
OWASP Maryam!
This program is free software: you can redistribute it and/or modify
Expand Down Expand Up @@ -87,7 +86,7 @@ def __init__(
lowercase=True,
preprocessor=None,
tokenizer=None,
stop_words="english",
stop_words='english',
token_pattern=r"(?u)\b\w\w+\b",
ngram_range=(1, 2),
max_df=1,
Expand Down Expand Up @@ -124,11 +123,13 @@ def __init__(
)
self.vectorizer = vectorizer


def fit(self, df, y=None):
self.metadata = df
self.tfidf_matrix = self.vectorizer.fit_transform(list(map(' '.join, df["pages"])))
return self


def predict(self, query: str) -> 'OrderedDict':
"""
Compute the top_n closest documents given a query
Expand Down Expand Up @@ -160,10 +161,8 @@ def predict(self, query: str) -> 'OrderedDict':
table = prettytable.PrettyTable(["rank", "index", "title"])
for i in range(len(closest_docs_indices)):
index = closest_docs_indices[i]
# if self.paragraphs:
# article_index = self.paragraphs[int(index)]["index"]
# title = self.metadata.iloc[int(article_index)]["title"]
# else:


title = self.metadata.iloc[int(index)]["title"]
table.add_row([rank, index, title])
rank += 1
Expand Down
82 changes: 0 additions & 82 deletions maryam/core/util/iris/safe_searcher.py

This file was deleted.

5 changes: 5 additions & 0 deletions maryam/core/util/iris/tf_histogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from collections import Counter
BASEDIR = os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../'))


class main:

def __init__(self, docs: 'documents', form: 'documet form. e.g html', without_punc=True):
Expand All @@ -33,18 +34,22 @@ def __init__(self, docs: 'documents', form: 'documet form. e.g html', without_pu
if self.without_punc:
self._punc()


def remove_stopwords(self, rest):
stops = open(os.path.join(BASEDIR, '../../', 'data', 'stopwords.csv')).read().split(',')
self.words = [x for x in self.words if x not in stops and x not in rest]


def _punc(self):
self.words = re.findall(r"[\w\-_#]{2,}", self.docs)


def _counter(self, last):
""" last: number of terms to show in plot """
bow = Counter(self.words)
return bow.most_common(last)


def plot_histogram(self, title, last, should_show=False):
import pandas as pd
import matplotlib.pyplot as plt
Expand Down
14 changes: 10 additions & 4 deletions maryam/core/util/iris/topic.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,22 @@
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
# core/util/iris/topic.py
# Hatma Suryotrisongko

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sentence_transformers import SentenceTransformer
BASEDIR = os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../'))

class main:

def __init__(self, inputfile, filetype, keyword, showcharts, verbose):

from dask import dataframe as dd
import json
from gensim.parsing.preprocessing import remove_stopwords
self.stops = open(os.path.join(BASEDIR, '../../', 'data', 'stopwords.csv')).read().split(',')

if verbose == True:
print("\n\n DATASET = reading file : " + inputfile)
Expand All @@ -42,7 +43,7 @@ def __init__(self, inputfile, filetype, keyword, showcharts, verbose):
print("\n\n csv file (before preprocessing) = ")
print(tmp4)

self.corpus = tmp4[0].str.lower().apply(remove_stopwords).to_numpy()
self.corpus = tmp4[0].str.lower().apply(self.remove_stopwords).to_numpy()

elif filetype == "json":
with open(inputfile) as json_file:
Expand All @@ -55,7 +56,7 @@ def __init__(self, inputfile, filetype, keyword, showcharts, verbose):
print(tmp)

tmp['td'] = tmp['t'] + ' ' + tmp['d']
self.corpus = tmp['td'].str.lower().apply(remove_stopwords).to_numpy()
self.corpus = tmp['td'].str.lower().apply(self.remove_stopwords).to_numpy()

else:
print('ERROR, only accept csv or json file!')
Expand All @@ -73,6 +74,11 @@ def __init__(self, inputfile, filetype, keyword, showcharts, verbose):
pd.Series([len(e.split()) for e in self.corpus]).hist()
plt.show()


def remove_stopwords(self, text):
return ''.join([x for x in text if x not in self.stops])


def run_sklearn_cluster_kmeans(self, selected_pretrained_model, showcharts, verbose):

from sklearn.cluster import KMeans
Expand Down
52 changes: 0 additions & 52 deletions maryam/core/util/iris/word_cloud.py

This file was deleted.

2 changes: 1 addition & 1 deletion maryam/modules/iris/iris.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
'description': 'Iris is a built-in meta search engine.',
'comments': ('It should be note that this is a beta version and has many bugs!',),
'contributors': 'Aman, Dimitris, Divya, Vikas, Kunal',
'sources': ('google', 'bing', 'duckduckgo', 'etools', 'startpage', 'searx', 'yahoo'),
'sources': ('google', 'bing', 'duckduckgo', 'etools', 'startpage', 'yahoo'),
'options': (
('query', None, True, 'Query string', '-q', 'store', str),
),
Expand Down
12 changes: 6 additions & 6 deletions maryam/modules/iris/iris_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@
"""

meta = {
'name': 'Iris_Cluster',
'name': 'Iris Cluster',
'author': 'Shaad',
'version': '0.1',
'description': 'Get Iris Search result and clustered results for your query',
'required': ('kneed', 'mlxtend, numpy, sklearn'),
'required': ('kneed', 'mlxtend', 'numpy', 'sklearn'),
'options': (
('query', None, True, 'Query string', '-q', 'store', str),
),
Expand Down Expand Up @@ -51,7 +51,7 @@ def module_run(self):

print('\n\nCLUSTER RESULT: ')
for index, title in enumerate(output):
print('\n')
print(f"CLUSTER {index+1}")
print(f"TITLE: {title}")
print(' '+'\n '.join(output[title]))
print('\n')
print(f"CLUSTER {index+1}")
print(f"TITLE: {title}")
print(' '+'\n '.join(output[title]))
2 changes: 1 addition & 1 deletion maryam/modules/iris/sentiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def module_api(self):
return
DATA = loads(file.read())
if key not in DATA and key != None:
self.error("The key doesn't exists", 'module_api', 'iris/sentiment')
self.error('The key doesn\'t exists', 'module_api', 'iris/sentiment')
return
if key != None:
DATA = DATA[key]
Expand Down
2 changes: 1 addition & 1 deletion maryam/modules/iris/topicmodeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,4 +60,4 @@ def module_api(self):
def module_run(self):
output = module_api(self)
self.output("\n\nOutput = \n")
self.output( output )
self.output( output )
Loading

0 comments on commit 2bb4ea2

Please sign in to comment.