Skip to content

Commit 867253f

Browse files
authored
Supplement and organize the dependency requirements.txt files and README.md files for each kernel; Unified the startup method of all kernels. (X-lab2017#1589)
1 parent 54478a9 commit 867253f

14 files changed

+149
-60
lines changed

Dockerfile

+16-16
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,21 @@
1-
FROM continuumio/miniconda3
2-
3-
LABEL maintainer="Yike Cheng<[email protected]>"
4-
5-
RUN mkdir python_kernel \
6-
&& mkdir python_kernel/notebook \
7-
&& pip install -i https://pypi.tuna.tsinghua.edu.cn/simple/ easydict==1.9 \
8-
&& pip install -i https://pypi.tuna.tsinghua.edu.cn/simple/ py2neo==2021.2.3 \
9-
&& pip install -i https://pypi.tuna.tsinghua.edu.cn/simple/ plotly==5.9.0 \
10-
&& pip install -i https://pypi.tuna.tsinghua.edu.cn/simple/ clickhouse-driver==0.2.3 \
11-
&& pip install -i https://pypi.tuna.tsinghua.edu.cn/simple/ numpy==1.23.2 \
12-
&& pip install -i https://pypi.tuna.tsinghua.edu.cn/simple/ jupyterlab==3.4.5 \
13-
&& pip install -i https://pypi.tuna.tsinghua.edu.cn/simple/ matplotlib==3.5.3 \
14-
&& pip install -i https://pypi.tuna.tsinghua.edu.cn/simple/ pandas==1.4.3 \
15-
&& pip install -i https://pypi.tuna.tsinghua.edu.cn/simple/ pyyaml==6.0
1+
ARG BASE_IMAGE
2+
3+
FROM ${BASE_IMAGE}
4+
# FROM registry.cn-beijing.aliyuncs.com/open-digger/open-digger-js-notebook:1.0
5+
# FROM continuumio/miniconda3 # LABEL maintainer="Yike Cheng<[email protected]>"
6+
7+
USER root
8+
9+
RUN mkdir -p /python_kernel/notebook
1610

1711
WORKDIR /python_kernel/notebook
1812

13+
ARG KER_REL_PATH # Kernel Relative Path e.g. './pycjs'
14+
15+
COPY ${KER_REL_PATH}/requirements.txt ${KER_REL_PATH}/requirements.txt
16+
17+
RUN pip install -r ${KER_REL_PATH}/requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple/
18+
1919
EXPOSE 8888
2020

21-
CMD jupyter lab --notebook-dir=/python_kernel/notebook --ip='*' --port=8888 --allow-root --no-browser
21+
CMD jupyter lab --notebook-dir=${WORKDIR} --ip='*' --port=8888 --allow-root --no-browser

notebook/node_vm2_pycaller.ipynb

+4-4
Original file line numberDiff line numberDiff line change
@@ -636,7 +636,7 @@
636636
"\n",
637637
"def processTechFieldRepoOpenrank(options, title):\n",
638638
" data = openDigger.index.openrank.getRepoOpenrank(\n",
639-
" dict(**baseOptions, \n",
639+
" dict(baseOptions, \n",
640640
" **dict(options)\n",
641641
" )\n",
642642
" )\n",
@@ -880,7 +880,7 @@
880880
" }\n",
881881
"def processTechFieldRepoActivity(options, title):\n",
882882
" data = openDigger.index.activity.getRepoActivity(\n",
883-
" dict(**baseOptions, \n",
883+
" dict(baseOptions, \n",
884884
" **dict(options)\n",
885885
" )\n",
886886
" )\n",
@@ -1121,10 +1121,10 @@
11211121
" \"type\": 'scatter'\n",
11221122
" }\n",
11231123
"def processRegionsCompanyRepoActivity(options, title):\n",
1124-
" data = openDigger.index.activity.getRepoActivity({\n",
1124+
" data = openDigger.index.activity.getRepoActivity(dict({\n",
11251125
" \"startYear\": startYear, \"endYear\": endYear, \"startMonth\": startMonth, \"endMonth\": endMonth,\n",
11261126
" \"groupBy\": 'Company', \"groupTimeRange\": 'year', \"order\": 'DESC',\n",
1127-
" })\n",
1127+
" }, **options))\n",
11281128
" \n",
11291129
" data_records = pd.DataFrame(data).to_dict('records')\n",
11301130
" data_periodranks = openDigger.getRank(data_records, lambda x: x[\"name\"], lambda x: x[\"activity\"])\n",

package.json

+1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
"build": "tsc",
1212
"notebook": "npm run build && docker pull registry.cn-beijing.aliyuncs.com/open-digger/open-digger-js-notebook:1.0 && docker run -it --rm -p 8888:8888 -v $(pwd):/home/node/notebook registry.cn-beijing.aliyuncs.com/open-digger/open-digger-js-notebook:1.0",
1313
"notebook:win": "npm run build && docker pull registry.cn-beijing.aliyuncs.com/open-digger/open-digger-js-notebook:1.0 && docker run -it --rm -p 8888:8888 -v %cd%:/home/node/notebook registry.cn-beijing.aliyuncs.com/open-digger/open-digger-js-notebook:1.0",
14+
"notebook-pycjs": "npm run build && docker build --build-arg KER_REL_PATH=./pycjs --build-arg BASE_IMAGE=registry.cn-beijing.aliyuncs.com/open-digger/open-digger-js-notebook:1.0 -t opendigger-jupyter-python:1.0 . && docker run -i -t --name python_notebook_name --rm -p 8888:8888 -v .:/python_kernel/notebook opendigger-jupyter-python:1.0",
1415
"pull-label-file-test": "tsc && node lib/ci/pull_label_file_test.js",
1516
"cron": "npm run build && node --max-old-space-size=25600 lib/cron/index.js",
1617
"test": "CLICKHOUSE_HOST=http://ci.open-digger.cn:8123 CLICKHOUSE_USERNAME=default CLICKHOUSE_PASSWORD= mocha"

pycjs/README.md

+59
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# Getting Start
2+
3+
## If you want to do some data analysis work:
4+
Start your ClickHouse container, which should be set up in [Clickhouse-sample-data](../sample_data/README.md)
5+
6+
1. Clone OpenDigger `git clone https://github.com/X-lab2017/open-digger.git`
7+
8+
2. Enter the repo path `cd open-digger`
9+
10+
Install the necessary packages `npm install`.
11+
12+
3. Go to the `src` folder(pycjs does not implement any bottom layer details) in the open-digger root directory, create a file named 'local_config.py'(this file has already added into `.gitignore` file.) for Python Kernel with the following contents:
13+
14+
```python
15+
local_config = {
16+
'db': {
17+
'clickhouse': {
18+
'host':'172.17.0.1',
19+
'user':'default'
20+
},
21+
'neo4j':{
22+
'port': '7687',
23+
}
24+
}
25+
}
26+
```
27+
the `host` above is the host of the ClickHouse server. We can find it using `docker inspect container_name`(the container_name is set by command docker run --name xxx), and copy the `Gateway` like this:
28+
29+
```shell
30+
$ docker inspect container_name | grep Gateway
31+
"Gateway": "172.17.0.1",
32+
"IPv6Gateway": "",
33+
"Gateway": "172.17.0.1",
34+
"IPv6Gateway": "",
35+
```
36+
If you use your own data, you can also change `host` field to your own host IP
37+
38+
Return the repo path `cd open-digger`.
39+
40+
Build ts `npm run build`. Since the npm run build command is important to active every settings change, the kernel pycjs supports `npm run notebook-pycjs` to execute the *npm run build, docker build and docker run* command automatically, instead of manually executing them step by step as below.
41+
42+
4. Use `docker build --build-arg KER_REL_PATH='./pycjs' --build-arg BASE_IMAGE='registry.cn-beijing.aliyuncs.com/open-digger/open-digger-js-notebook:1.0' -t opendigger-jupyter-python:1.0 $(pwd)` to make a docker image, this image is based on `miniconda`. You can check the `Dockerfile` in root directory.
43+
44+
> If you are using **Windows CMD**, all the `$(pwd)` here should be replaced by `%cd%`. And if you are using **Windows Powershell**, all the `$(pwd)` here should be replaced by `${pwd}`.
45+
>
46+
> **Notice:** Pathnames of directories like "pwd" may use `\` to join the directory in some versions of Windows. We recommend using absolute paths.
47+
48+
5. Then we can use `docker run -i -t --name python_notebook_name --rm -p 8888:8888 -v "$(pwd):/python_kernel/notebook" opendigger-jupyter-python:1.0` to create and run the container.
49+
50+
6. Open the link in console log like `http://127.0.0.1:8888/lab?token=xxxxx`.
51+
52+
7. If the source code under `python` folder changed, you need to stop the notebook docker using `docker stop python_notebook_name` and restart the notebook kernel using `docker run -i -t --name python_notebook_name --rm -p 8888:8888 -v "$(pwd):/python_kernel/notebook" opendigger-jupyter-python:1.0` to reload the sorce code.
53+
54+
8. You can find the notebook folder, where we provide demos in the handbook. You can create a new file, and happy data exploring!
55+
Attention: you need to do this work in `notebook` or other parallel folder. If you run in root directory, it can't work because of python import rules.
56+
57+
## If you are a developer:
58+
59+
You can also make `workspace.py` in `python` folder. and run it.

pycjs/requirements.txt

+4-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
1+
clickhouse-driver>=0.2.8
12
ipynbname==2023.2.0.0
23
ipython==8.0.1
34
ipython-genutils==0.2.0
45
jupyterlab>=3.2.8
56
matplotlib>=3.5.3
67
node-vm2==0.4.7
7-
numpy>=1.21.5
8-
pandas>=1.4.4
8+
numpy>=1.23.2
9+
pandas>=1.4.3
10+
tabulate==0.9.0

python/README.md

+4-4
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ Start your ClickHouse container, which should be set up in [Clickhouse-sample-da
2222
}
2323
}
2424
```
25-
the `host` above is the host of the ClickHouse server. We can find it using `docker inspect containert_name`, and copy the `Gateway` like this:
25+
the `host` above is the host of the ClickHouse server. We can find it using `docker inspect container_name`(the container_name is set by command docker run --name xxx), and copy the `Gateway` like this:
2626

2727
```shell
2828
$ docker inspect container_name | grep Gateway
@@ -32,17 +32,17 @@ Start your ClickHouse container, which should be set up in [Clickhouse-sample-da
3232
"IPv6Gateway": "",
3333
```
3434
If you use your own data, you can also change `host` field to your own host IP
35-
4. Use `docker build -t opendigger-jupyter-python:1.0 $(pwd)` to make a docker image, this image is based on `miniconda`. You can check the `Dockerfile` in root directory.
35+
4. Use `docker build --build-arg KER_REL_PATH='./python' --build-arg BASE_IMAGE='continuumio/miniconda3' -t opendigger-jupyter-python:1.0 $(pwd)` to make a docker image, this image is based on `miniconda`. You can check the `Dockerfile` in root directory.
3636

3737
> If you are using **Windows CMD**, all the `$(pwd)` here should be replaced by `%cd%`. And if you are using **Windows Powershell**, all the `$(pwd)` here should be replaced by `${pwd}`.
3838
>
3939
> **Notice:** Pathnames of directories like "pwd" may use `\` to join the directory in some versions of Windows. We recommend using absolute paths.
4040
41-
5. Then we can use `docker run -it --name python_notebook_name --rm -p 8888:8888 -v $(pwd):/python_kernel/notebook opendigger-jupyter-python:1.0` to create and run the container.
41+
5. Then we can use `docker run -i -t --name python_notebook_name --rm -p 8888:8888 -v "$(pwd):/python_kernel/notebook" opendigger-jupyter-python:1.0` to create and run the container.
4242

4343
6. Open the link in console log like `http://127.0.0.1:8888/lab?token=xxxxx`.
4444

45-
7. If the source code under `python` folder changed, you need to stop the notebook docker using `docker stop python_notebook_name` and restart the notebook kernel using `docker run -it --name python_notebook_name --rm -p 8888:8888 -v $(pwd):/python_kernel/notebook opendigger-jupyter-python:1.0` to reload the sorce code.
45+
7. If the source code under `python` folder changed, you need to stop the notebook docker using `docker stop python_notebook_name` and restart the notebook kernel using `docker run -i -t --name python_notebook_name --rm -p 8888:8888 -v "$(pwd):/python_kernel/notebook" opendigger-jupyter-python:1.0` to reload the sorce code.
4646

4747
8. You can find the notebook folder, where we provide demos in the handbook. You can create a new file, and happy data exploring!
4848
Attention: you need to do this work in `notebook` or other parallel folder. If you run in root directory, it can't work because of python import rules.

python/db/clickhouse.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,4 @@ def query(q):
1515
return client.execute(q)
1616
def queryDataframe(q):
1717
client = getClient()
18-
return client.query_dataframe(q)
18+
return client.query_dataframe(q, replace_nonwords=False)

python/requirements.txt

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
easydict==1.9
2+
py2neo>=2021.2.3
3+
plotly==5.9.0
4+
clickhouse-driver>=0.2.8
5+
numpy>=1.23.2
6+
jupyterlab==3.4.5
7+
matplotlib>=3.5.3
8+
pandas>=1.4.3
9+
pyyaml>=6.0

python_v2/README.md

+5-5
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ Start your ClickHouse container, which should be set up in [Clickhouse-sample-da
77

88
2. Enter the repo path `cd open-digger`
99

10-
3. Go to the `python` folder in the open-digger root directory, create a file named 'local_config.py'(this file has already added into `.gitignore` file.) for Python Kernel with the following contents:
10+
3. Go to the `python_v2` folder in the open-digger root directory, create a file named 'local_config.py'(this file has already added into `.gitignore` file.) for Python Kernel with the following contents:
1111

1212
```python
1313
local_config = {
@@ -22,7 +22,7 @@ Start your ClickHouse container, which should be set up in [Clickhouse-sample-da
2222
}
2323
}
2424
```
25-
the `host` above is the host of the ClickHouse server. We can find it using `docker inspect containert_name`, and copy the `Gateway` like this:
25+
the `host` above is the host of the ClickHouse server. We can find it using `docker inspect container_name`(the container_name is set by command docker run --name xxx), and copy the `Gateway` like this:
2626

2727
```shell
2828
$ docker inspect container_name | grep Gateway
@@ -32,17 +32,17 @@ Start your ClickHouse container, which should be set up in [Clickhouse-sample-da
3232
"IPv6Gateway": "",
3333
```
3434
If you use your own data, you can also change `host` field to your own host IP
35-
4. Use `docker build -t opendigger-jupyter-python:1.0 $(pwd)` to make a docker image, this image is based on `miniconda`. You can check the `Dockerfile` in root directory.
35+
4. Use `docker build --build-arg KER_REL_PATH='./python_v2' --build-arg BASE_IMAGE='continuumio/miniconda3' -t opendigger-jupyter-python:1.0 $(pwd)` to make a docker image, this image is based on `miniconda`. You can check the `Dockerfile` in root directory.
3636

3737
> If you are using **Windows CMD**, all the `$(pwd)` here should be replaced by `%cd%`. And if you are using **Windows Powershell**, all the `$(pwd)` here should be replaced by `${pwd}`.
3838
>
3939
> **Notice:** Pathnames of directories like "pwd" may use `\` to join the directory in some versions of Windows. We recommend using absolute paths.
4040
41-
5. Then we can use `docker run -it --name python_notebook_name --rm -p 8888:8888 -v $(pwd):/python_kernel/notebook opendigger-jupyter-python:1.0` to create and run the container.
41+
5. Then we can use `docker run -i -t --name python_notebook_name --rm -p 8888:8888 -v "$(pwd):/python_kernel/notebook" opendigger-jupyter-python:1.0` to create and run the container.
4242

4343
6. Open the link in console log like `http://127.0.0.1:8888/lab?token=xxxxx`.
4444

45-
7. If the source code under `python` folder changed, you need to stop the notebook docker using `docker stop python_notebook_name` and restart the notebook kernel using `docker run -it --name python_notebook_name --rm -p 8888:8888 -v $(pwd):/python_kernel/notebook opendigger-jupyter-python:1.0` to reload the sorce code.
45+
7. If the source code under `python` folder changed, you need to stop the notebook docker using `docker stop python_notebook_name` and restart the notebook kernel using `docker run -i -t --name python_notebook_name --rm -p 8888:8888 -v "$(pwd):/python_kernel/notebook" opendigger-jupyter-python:1.0` to reload the sorce code.
4646

4747
8. You can find the notebook folder, where we provide demos in the handbook. You can create a new file, and happy data exploring!
4848
Attention: you need to do this work in `notebook` or other parallel folder. If you run in root directory, it can't work because of python import rules.

python_v2/config.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -34,17 +34,17 @@
3434
}
3535
}
3636
def mergeConfig(base_config, local_config):
37-
for key in base_config.keys():
38-
if isinstance(base_config[key], dict) and isinstance(local_config[key], dict):
39-
mergeConfig(base_config[key], local_config[key])
40-
else:
41-
base_config[key] = local_config[key]
37+
for key, val in local_config.items():
38+
if isinstance(val, dict):
39+
mergeConfig(base_config[key], val)
40+
else:
41+
base_config[key] = val
4242
return base_config
43-
def getConfig(local_config=None):
44-
local_config = local_config or {}
43+
def getConfig():
4544
global config
4645
if not inited:
4746
try:
47+
from local_config import local_config
4848
config = mergeConfig(config, local_config)
4949
return config
5050
except:

python_v2/db/clickhouse_wrapper.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,4 +21,4 @@ def query(self, q):
2121
return self.client.execute(q)
2222

2323
def queryDataframe(self,q):
24-
return self.client.query_dataframe(q)
24+
return self.client.query_dataframe(q, replace_nonwords=False)

python_v2/requirements.txt

+4-4
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
DateTime==5.4
2-
clickhouse-driver==0.2.6
2+
clickhouse-driver>=0.2.8
33
easydict==1.11
44
ipynbname==2023.2.0.0
5-
jupyterlab
5+
jupyterlab>=3.2.8
66
matplotlib>=3.5.3
7-
numpy>=1.21.5
8-
pandas==1.4.4
7+
numpy>=1.23.2
8+
pandas>=1.4.3
99
plotly==5.9.0
1010
py2neo==2021.2.4
1111
typing==3.7.4.3

requirements_python.txt

-9
This file was deleted.

0 commit comments

Comments
 (0)