Optimizing search on websites with tipuesearch js
var tipuesearch = {"pages": [
"title": "Welcome to JIVE - Guidelines | ACME", "text": "Welcome to the ACME Jive Collaboration platform Please read this document, which contains","tags": "Welcome to JIVE - Guidelines | ACME","url": "http://10.0.82.13/DOC-1001.html"},
{"title": "Datacenter FAQ - ACME ISO Hosting | ACME", "text": "Where are the datacenters located? Do you subcontract activities? We have several datacenters around","tags": "Datacenter FAQ - ACME ISO Hosting | ACME","url": "http://10.0.82.13/DOC-1002.html"},
{"title": "Customer Information template | ACME", "text": "This document will need to become the template that is used to create the information of the custome","tags": "Customer Information template | ACME","url": "http://10.0.82.13/DOC-1003.html"},
{"title": "ISO Customer List | ACME", "text": "Please add customers as content is created. Please ensure you add the name alphabetically. Insert fo","tags": "ISO Customer List | ACME","url": "http://10.0.82.13/DOC-1004.html"},
{"title": "Accenture Phillipines/SiACMEpore - APH - Hosted | ACME", "text": "CustomerAccenture Philippines/SiACMEporealso known asAPHISO Project CodeAPHContract Start date01 June","tags": "Accenture Phillipines/SiACMEpore - APH - Hosted | ACME","url": "http://10.0.82.13/DOC-1006.html"},
{"title": "KB: SAP logon screen hangs - Oracle | ACME", "text": "KeywordsORA 257 00257 ORA-00257 Archivelog system hangs logon login screenSymptomSAP logon screen ap","tags": "KB: SAP logon screen hangs - Oracle | ACME","url": "http://10.0.82.13/DOC-1007.html"},
{; ]}
Creation of the index database using python
import urllib2.request
import glob, os
import codecs
import sys
import string
from bs4 import BeautifulSoup
#os.chdir("D:\MergedCopies_17042015\jive.ACME.com\docs\DOC-1008.html")
def paquillo():
= 100
max_lenth = 11
tag_start
= open('D:\Documentos\index.json','w+')
output_file
"D:\MergedCopies_17042015\jive.ACME.com\docs")
os.chdir(for file in glob.glob("DOC-[0-9][0-9][0-9][0-9].html"):
print('Document: '+file)
= codecs.open(file,encoding='utf-8')
f = BeautifulSoup(f.read())
doc if len(doc.select('.jive-rendered-content'))>0:
= doc.select('.jive-rendered-content')[0]
text = len(text.get_text())
text_lenth if text_lenth>=max_lenth:
= text.get_text()[:max_lenth]
content_text elif text_lenth==0:
= doc.title.string
content_text else :
= text.get_text()
content_text else :
= doc.title.string
content_text
if len(doc.select('.jive-icon-med .jive-icon-folder'))>0:
= doc.select('.jive-icon-med .jive-icon-folder')[0]
tags = len(tags.get_text())
tag_lenth if tag_lenth > tag_start:
= tags[:-tag_start]
tag_text else :
= doc.title.string
tag_text else :
= doc.title.string
tag_text
= ('{\"title\": \"'+string.replace(doc.title.string,'\"','').strip()+'\", \"text\": \"'+
string_to_file '\"','').replace('\n','').strip()+'\",'+
string.replace(content_text,'\"tags\": \"'+string.replace(tag_text,'\"','').strip()+'\",'+
'\"url\": \"' + 'http://10.0.82.13/' + file +'\"},').encode('utf-8')+'\n'
output_file.write(string_to_file)
output_file.close()return 0
def main():
# sys.setdefaultencoding('utf-8')
#reload(sys)
paquillo()
if __name__ == "__main__":
main()