December 22, 2010

dtSearch : Step 4 search with the sitecore API

See also:
With the sitecore API

Here is an example of class to execute some search with dtsearch.
You can download it here


using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Xml;
using System.Data;
using System.Configuration;
using System.Collections;
using System.Web;
using System.Web.UI;

using Sitecore;
using Sitecore.Configuration;
using Sitecore.Diagnostics;
using Sitecore.Modules.dtSearch;
using Sitecore.Modules.dtSearch.Core;
using dtSearch.Engine;
using Sanitec;
using System.Xml.Linq;
using System.Text.RegularExpressions;
using Sitecore.Data.Items;
using System.IO;

namespace QuickSearch
{
    /// <summary>
    /// Can be used to avoid duplicate records
    /// </summary>
    public class SearchResultContentComparer : IEqualityComparer<SearchResultContent>
    {
        #region IEqualityComparer<SearchResultContent> Members
        public bool Equals(SearchResultContent x, SearchResultContent y)
        {
            return x.Url == y.Url
                || (!String.IsNullOrEmpty(x.ScProduct) && x.ScProduct == y.ScProduct);
        }

        public int GetHashCode(SearchResultContent obj)
        {
            return obj.Url.GetHashCode();
        }
        #endregion
    }

    public class SearchResultContent
    {
        private Item sitecoreItem = null;
        /// <summary>
        /// The corresponding sitecore item
        /// </summary>
        public Item SitecoreItem
        {
            get
            {
                //Handle the MediaItem item documents
                Regex reg = new Regex("^.*?/~/media/(.*?).ashx");
                Match match = reg.Match(Url);

                if (!match.Success)
                {
                    sitecoreItem = Sitecore.Context.Database.GetItem(ScID);
                }
                else
                {
                    string path = "/sitecore/media library/" + match.Groups[1];
                    sitecoreItem = Sitecore.Context.Database.SelectSingleItem(path);
                }

                return sitecoreItem;
            }
        }                

        public string ScID { get; set; }
        public string Title { get; set; }
        public string Url { get; set; }
        public string Synopsis { get; set; }
        public string Score { get; set; }
        public string ScProduct { get; set; }
        public string ScLang { get; set; }

    }

    public class QuickSearchContent
    {
        public string Sitename { get; set; }

        private string nameIndexDtSearch()
        {
            string dtSearchIndex = "dtSearchIndexName_" + Sitename;
            return Sitecore.Configuration.Settings.GetSetting(dtSearchIndex);
        }

        private string pathIndexDtSearch()
        {
            string dtSearchIndexFolder = "dtSearchIndexFolder_" + Sitename;
            return Sitecore.Configuration.Settings.GetSetting(dtSearchIndexFolder);
        }
        
        /// <summary>
        /// The sitename must correspond to the settings dtSearch.config
        ///  dtSearchIndexFolder_[sitename]
        ///  dtSearchIndexName_[sitename]
        /// </summary>
        /// <param name="sitename">The sitename must correspond to the settings dtSearch.config</param>
        public QuickSearchContent(string sitename)
        {
            Sitename = sitename;
        }

        /// <summary>
        /// Return the results
        /// </summary>
        /// <param name="query">The words to search for</param>
        /// <returns>the results</returns>
        public List<SearchResultContent> LoadResults(string query)
        {
            List<SearchResultContent> results = new List<SearchResultContent>();

            if (!string.IsNullOrEmpty(query))
            {
                XDocument searchResults = XDocument.Load(new XmlNodeReader(GetXmlResults(query)));

                results = (from XElement node in searchResults.Descendants("item")
                            select new SearchResultContent
                            {
                                ScID = (node.Element("scID") == null ? String.Empty : node.Element("scID").Value),
                                ScProduct = (node.Element("scProduct") == null ? String.Empty : node.Element("scProduct").Value),
                                ScLang = (node.Element("scLang") == null ? String.Empty : node.Element("scLang").Value),
                                Score = node.Attribute("score").Value,
                                Title = node.Element("title").Value,
                                Url = node.Element("url").Value,
                                Synopsis = node.Element("synopsis").Value
                            }).ToList();
            }

            return results;
        }

        private dtSearchEngine your_dtSearch;

        /// <summary>
        /// Initialize DTsearch
        /// </summary>
        /// <param name="query">The words to search for</param>
        /// <returns>An XmlDocument with the results</returns>
        private XmlDocument GetXmlResults(string query)
        {
            ISearch engine = dtSearchFactory.CreateSearchEngine();
            Assert.IsNotNull(engine, "Cannot create the dtsearch engine the 'dtSearchIndexName' key in the web.config may be incorrect");

            // Get the dtSearch engine implementation
            your_dtSearch = engine as dtSearchEngine;            
            Assert.IsNotNull(your_dtSearch, "Cannot cast the engine to dtSearchEngine");
            
            // Set index
            string pathIndex = pathIndexDtSearch() + nameIndexDtSearch() + "\\";
            Assert.IsTrue(System.IO.Directory.Exists(pathIndex), "Could not find the directory: " + pathIndex);
            your_dtSearch.Index = pathIndex;

            // Execute search
            your_dtSearch.Search(query, Sitecore.Modules.dtSearch.Core.Constants.Options.WildCards | Sitecore.Modules.dtSearch.Core.Constants.Options.AllWords);

            // Get search results as XML
            return engine.SearchResults;
        }
    }
}

Usage of this class:

QuickSearchContent qsc = new QuickSearchContent("YourSite");
var results = qsc.LoadResults("Your Query");

December 16, 2010

dtSearch : Step 3 install and configure

See also:
Install

First of all you need to download the sitecore package and install it. Normally, they are no problems with this step.
After that, you need to install the dtSearch windows application (a light version provided by sitecore named dtIndexer).
If you have some troubles when you start dtIndexer because a dll is missing of anything else, the work around is:
  1. uninstall the light sitecore version 
  2. install the full evaluation version of dtSearch
  3. launch it
  4. uninstall this full version
  5. reinstall the sitecore version
Add your site

  1.  Launch dtIndexer
  2. Click on 'Create Index (advanced)...'
  3. Configure as below
  4. You may add *.ashx in exclude filter to exclude the handlers
  5. Click on 'Add web...'

  6. Add your website url and choose how many level (hyperlink) dtSearch need to browse
  7. Click on 'Start indexing'
That's all for the configuration of dtSearch himself.

Modify dtSearch.config
To add an index to your web app, you need to modify the file: \App_Config\Include\dtSearch.config
<settings>
 ...

 <!-- Your index -->
 <setting name="dtSearchIndexFolder" value="C:\Program Files (x86)\Sitecore\dtSearch\search\" />
 <setting name="dtSearchIndexName" value="nameOfYourIndex" />

 ...
</settings> 

Adding some custom fields
If you need to index some custom fields, you need to add it in fields.xml.
This file is in the \bin folder of dtSearch (by default something like this: C:\Program Files (x86)\Sitecore\dtSearch\bin\fields.xml)
 Here is an example with 2 custom fields: scProduct and scLang:
<?xml version="1.0" encoding="UTF-8" ?>
<dtSearchTextFieldsDefinitions>
 <Item>
  <Name>scID</Name>
  <Start></Start>
  <End></End>
  <Filters></Filters>
  <LinesToScan>0</LinesToScan>
  <Flags>8</Flags>
 </Item>
 <Item>
  <Name>scPath</Name>
  <Start></Start>
  <End></End>
  <Filters></Filters>
  <LinesToScan>0</LinesToScan>
  <Flags>8</Flags>
 </Item>
 <Item> 
  <Name>scProduct</Name> 
  <Start></Start> 
  <End></End> 
  <Filters></Filters> 
  <LinesToScan>0</LinesToScan> 
  <Flags>8</Flags> 
 </Item> 
 <Item> 
  <Name>scLang</Name> 
  <Start></Start> 
  <End></End> 
  <Filters></Filters> 
  <LinesToScan>0</LinesToScan> 
  <Flags>8</Flags> 
 </Item>
</dtSearchTextFieldsDefinitions> 

It seem that you can also add some custom fields only for one site using the box 'Fields to display in search results' by I never try this method.

December 9, 2010

dtSearch : Step 2 index your pages

See also:

The trivial way to index your pages with dtSearch is to start from the homepage and the let dtSearch browse on X level of pages. dtSearch will follow the html link in your html.

But the problems begin when you use any linkbutton, paging, button, … because it is not only html link but it is a postback using javascript (like: javascript:__doPostBack('body_1$rightzone_0$lbSearch','')). Of course you may try to adapt your code to only use html link but it is complicate and it not allow you to use and index the ajax requests.

The easiest way to index all your pages is to create a sitemap for dtSearch with all your pages, including (if possible) all the possibilities for the wildcard items and use this url ton index you site on 1 level.

If you need to deal with a slashpage if a cookie if not set for example, you will probalbly need to detect dtsearch to do some different operations to do that you have 2 possibilities:

You may use HttpContext.Current.Request.UserAgent and compare to the dtsearch useragent



Or you may also detect if the request come from a crawler, but you need to define dtsearch as a crawler.

To configure that you need to add this line in your web.config just before &lt;/system.web&gt;:
&lt;browsercaps configsource="App_Config\BrowserCaps.config"&gt;

You need a recent BrowserCaps.config (find it on google or download it here) and add this in it (already included in my file):
&lt;case match="dtSearch*"&gt;
    browser=dtsearch
    crawler=true
    Unknown=false
    type=%{browser}
&lt;/case&gt;

After this config you may detect the crawler (including dtsearch) using this simple code:
if (Request.Browser.Crawler)
 ...

dtSearch : Step 1 adapt your aspx

See also:

This is the first post of series to use dtSearch for sitecore step by step.

The first think you need to do when you use dtSearch is to adapt your aspx/ascx/html code.

Normally, you will adapt the Master page to include some special field or values. To do that you need to include some tags like this in the < head > of your html:

<meta id="scID" content="<%# Sitecore.Context.Item.ID%>" name="scID" />
<meta id="Title" content="<%# Sitecore.Context.Item["Title"]%>" name="scID" />
<meta id="scPath" content="<%# Sitecore.Context.Item.Paths.Path%>" name="scPath" />
<meta id="scLang" content="<%# Sitecore.Context.Language.ToString() %>" name="scLang" />


I think that it is a best practice to include these 4 tags (except if you are sure to not use the multilingual functionalities).

If you don’t specify anything else, the whole page will be indexed but, most of the time you don’t need to index the menus, the JavaScript, …

You may exclude some part of the code using these tags:
<!--BeginNoIndex-->
...
<!--EndNoIndex-->
All the code between these 2 tags will be ignored by dtSearch.

That all for the general pages.

December 1, 2010

Don't use HostName

I discover that it is better to not use Sitecore.Context.Site.HostName but Sitecore.Context.Site.TargetHostName because the HostName property is ok while you one have only one Hostname per site in the web.config.

The TargetHostName take the property TargetHostName of the site if specified in the site tag of the web.config or the first hostname if this property is not specified.