Difference between revisions of "IAnnSolrMigration"

From Protein Prediction 2 Winter Semester 2014
Line 1: Line 1:
  +
==='''Status'''===
  +
------
  +
  +
  +
==='''Introduction'''===
  +
------
  +
  +
  +
  +
==='''Project goals'''===
  +
------
  +
  +
  +
 
==='''Proposal for Schema changes'''===
 
==='''Proposal for Schema changes'''===
 
------
 
------

Revision as of 16:52, 1 January 2015

Status



Introduction



Project goals



Proposal for Schema changes


Current version of iAnn website is delivering a service by using custom schema for data attributes naming. However, it is not standardized and does not have default field naming scheme. Therefore, a community agreed on default schema layout, named SASI. In this document we will point out differences and describe new schema fields. As current iAnn collection runs on Apache Solr 4.1 and the latest one with some major bug fixes is 4.10.2, we decided to migrate and update our service as well.

Accomplished tasks:

  • Understanding of existing scheme,
  • Proposing structure changes and
  • Highlighting differences


Old Scheme:

  <field name="id" type="string" indexed="true" stored="true" required="true" />
  <field name="title" type="text" indexed="true" stored="true"/>
  <field name="subtitle" type="text" indexed="true" stored="true"/>
  <field name="description" type="string" indexed="false" stored="true"/>
  <field name="provider" type="text_lowercase" indexed="true" stored="true"/>
  <field name="link" type="string" indexed="false" stored="true"/>
  <field name="start" type="tdate" indexed="true" stored="true"/>
  <field name="end" type="tdate" indexed="true" stored="true"/>
  <field name="venue" type="text_lowercase" indexed="true" stored="true"/> 
  <field name="city" type="text_lowercase" indexed="true" stored="true"/>
  <field name="county" type="string" indexed="fasle" stored="true"/>
  <field name="country" type="text_lowercase" indexed="true" stored="true"/>
  <field name="postcode" type="string" indexed="false" stored="true"/>
  <field name="attachment" type="string" indexed="false" stored="true" multiValued="true"/>
  <field name="image" type="string" indexed="false" stored="true" multiValued="true"/>
  <field name="keyword" type="text_lowercase" indexed="true" stored="true" multiValued="true"/>
  <field name="category" type="text_lowercase" indexed="true" stored="true" multiValued="true"/>
  <field name="field" type="text_lowercase" indexed="true" stored="true" multiValued="true"/> 
  <field name="submission_name" type="text_lowercase" indexed="true" stored="true" multiValued="true"/>
  <field name="submission_email" type="string" indexed="true" stored="true" multiValued="true"/>
  <field name="submission_date" type="tdate" indexed="true" stored="true" multiValued="true"/>
  <field name="submission_comment" type="text" indexed="false" stored="true" multiValued="true"/>
  <field name="submission_organization" type="text" indexed="true" stored="true" multiValued="true"/>
  <field name="latitude" type="double" indexed="true" stored="true"/>
  <field name="longitude" type="double" indexed="true" stored="true"/>
  <field name="sponsor" type="text_lowercase" indexed="true" stored="true"/>
  <field name="public" type="boolean" indexed="true" stored="true"/>
  <field name="text" type="text" indexed="true" stored="false" multiValued="true"/>
  <field name="_version_" type="long" indexed="true" stored="true"/>


New Scheme:

  <field name="id" type="text" indexed="true" stored="true" required="true" />
  <field name="title" type="text" indexed="true" stored="true"/>
  <field name="subtitle" type="text" indexed="true" stored="true"/>
  <field name="description" type="text" indexed="false" stored="true"/>
  <field name="prerequisites" type="text" indexed="false" stored="true"/>
  <field name="programme" type="programme" indexed="false" stored="true"/>
  <field name="comments" type="text" indexed="false" stored="true"/>
  <field name="fees" type="fees" indexed="false" stored="true" multiValued="true"/>
  <field name="discount" type="discount" indexed="false" stored="true"/>
  <field name="accreditation" type="text" indexed="false" stored="true"/>
  <field name="status" type="text" indexed="false" stored="true"/>
  <field name="eligibility" type="text" indexed="false" stored="true" multiValued="true"/>
  <field name="capacity" type="int" indexed="false" stored="true"/>
  <field name="contact" type="person" indexed="false" stored="true" multiValued="true"/>
  <field name="submitter" type="person" indexed="false" stored="true" multiValued="true"/>
  <field name="organizers" type="person" indexed="false" stored="true" multiValued="true"/>
  <field name="speakers" type="person" indexed="false" stored="true" multiValued="true"/>
  <field name="host_institution" type="organization" indexed="false" stored="true" multiValued="true"/>
  <field name="sponsor" type="organization" indexed="false" stored="true" multiValued="true"/>
  <field name="venue" type="text" indexed="false" stored="true"/>
  <field name="street_address" type="text" indexed="false" stored="true"/>
  <field name="city" type="text" indexed="false" stored="true"/>
  <field name="province" type="text" indexed="false" stored="true"/>
  <field name="country" type="text" indexed="false" stored="true"/>
  <field name="postcode" type="text" indexed="false" stored="true"/>
  <field name="post_office_box" type="text" indexed="false" stored="true"/>
  <field name="url" type="link" indexed="false" stored="true"/>
  <field name="attachment" type="link" indexed="false" stored="true" multiValued="true"/>
  <field name="social_media" type="link" indexed="false" stored="true" multiValued="true"/>
  <field name="starts" type="date" indexed="false" stored="true"/>
  <field name="ends" type="date" indexed="false" stored="true"/>
  <field name="time_zone" type="text" indexed="false" stored="true"/>
  <field name="last_update" type="date" indexed="false" stored="true" multiValued="true"/>
  <field name="deadlines" type="date" indexed="false" stored="true" multiValued="true"/>
  <field name="registration_opens_date" type="date" indexed="false" stored="true" multiValued="true"/>
  <field name="acceptance_notification_date" type="date" indexed="false" stored="true" multiValued="true"/>
  <field name="type" type="text" indexed="false" stored="true" multiValued="true"/>
  <field name="topic" type="text" indexed="false" stored="true" multiValued="true"/>
  <field name="public" type="boolean" indexed="false" stored="true"/>
  <field name="target_audience" type="text" indexed="false" stored="true" multiValued="true"/>
  <field name="spotlight" type="boolean" indexed="false" stored="true"/>
  <field name="latitude" type="double" indexed="true" stored="true"/>
  <field name="longitude" type="double" indexed="true" stored="true"/>
  <field name="_version_" type="long" indexed="true" stored="true"/>


Conclusion:

While using newly proposed scheme iAnn will have a possibility to serve even more diverse data. This will serve as a basis for new services on top of Solr search engine. On top of that, migration to newer version of Solr will make it safer.

GitHub Repository

GitHub : iANN_Solr