-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmake_LIWC_dictionary.py
106 lines (84 loc) · 1.99 KB
/
make_LIWC_dictionary.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import pandas as pd
import yaml
import csv
import re
def remove_null(alist):
#Removes null from lists
blist = []
for val in alist:
if pd.isna(val) == False:
blist.append(val)
return blist
def make_not_columns(nflevels, levels):
#Gives the rows which are to be dropped
not_rows = []
for i in range(1,nflevels + 1):
if i not in levels:
not_rows.append(i - 1)
return not_rows
def make_dictionary(levelType, filename = "variables.yml",file_to_read = "variables.csv"):
#---------------------------
df = pd.read_csv(file_to_read)
df = df.T
(nfrows,nfcolumns) = df.shape
#---------------------------
#Preprocessing
rows = []
for i in range(nfrows):
row = df.iloc[i]
row = remove_null(row)
rows.append(row)
#---------------------------
for i in range(len(rows)):
if levelType == 1:
rows[i][2] = rows[i][0]
elif levelType == 2:
rows[i][2] = rows[i][1]
rows[i] = rows[i][2:]
headers = {}
headers_count = 1
links = {}
i = 0
while(i < nfrows):
array = rows[i]
casetype = rows[i+1]
i += 2
if(len(array) <= 1):
continue
if array[0] not in headers:
headers[array[0]] = headers_count
headers_count += 1
for j in range(1,len(array)):
word = array[j]
case = casetype[j]
num = headers[array[0]]
if word not in links:
links[word] = [num]
else:
if num not in links[word]:
links[word].append(num)
if case == 'nc':
if 'A' <= word[0] <= 'Z':
word = word[0].lower() + word[1:]
else:
word = word[0].upper() + word[1:]
if word not in links:
links[word] = [num]
else:
if num not in links[word]:
links[word].append(num)
string = "%\n"
for i in headers:
string += str(headers[i]) + '\t' + i + '\n'
string += '%\n'
for word in links:
string += word
arr = links[word]
for num in arr:
string += '\t' + str(num)
string += '\n'
file = open('LIWC.dic','w')
file.write(string)
file.close()
n = int(input("what level do you wanna make the header [1/2/3] = "))
make_dictionary(n)