<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
	xmlns:content="http://purl.org/rss/1.0/modules/content/"
	xmlns:wfw="http://wellformedweb.org/CommentAPI/"
	xmlns:dc="http://purl.org/dc/elements/1.1/"
	xmlns:atom="http://www.w3.org/2005/Atom"
	xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
	xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
	xmlns:georss="http://www.georss.org/georss" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:media="http://search.yahoo.com/mrss/"
	>

<channel>
	<title>(un)structured</title>
	<atom:link href="http://unstructuredthoughts.wordpress.com/feed/" rel="self" type="application/rss+xml" />
	<link>http://unstructuredthoughts.wordpress.com</link>
	<description>Josh Payne on content analytics, enterprise content and information management</description>
	<lastBuildDate>Tue, 29 Jun 2010 17:40:36 +0000</lastBuildDate>
	<language>en</language>
	<sy:updatePeriod>hourly</sy:updatePeriod>
	<sy:updateFrequency>1</sy:updateFrequency>
	<generator>http://wordpress.com/</generator>
<cloud domain='unstructuredthoughts.wordpress.com' port='80' path='/?rsscloud=notify' registerProcedure='' protocol='http-post' />
<image>
		<url>http://s2.wp.com/i/buttonw-com.png</url>
		<title>(un)structured</title>
		<link>http://unstructuredthoughts.wordpress.com</link>
	</image>
	<atom:link rel="search" type="application/opensearchdescription+xml" href="http://unstructuredthoughts.wordpress.com/osd.xml" title="(un)structured" />
	<atom:link rel='hub' href='http://unstructuredthoughts.wordpress.com/?pushpress=hub'/>
		<item>
		<title>This blog has moved!</title>
		<link>http://unstructuredthoughts.wordpress.com/2010/06/29/this-blog-has-moved/</link>
		<comments>http://unstructuredthoughts.wordpress.com/2010/06/29/this-blog-has-moved/#comments</comments>
		<pubDate>Tue, 29 Jun 2010 17:40:36 +0000</pubDate>
		<dc:creator>Josh Payne</dc:creator>
				<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://unstructuredthoughts.wordpress.com/?p=128</guid>
		<description><![CDATA[This blog has moved to http://www.unstructuredthoughts.com.
Last one there is a rotten egg!<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=unstructuredthoughts.wordpress.com&amp;blog=11142630&amp;post=128&amp;subd=unstructuredthoughts&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>Once again, I&#8217;ve moved my blogging activities.</p>
<p>I&#8217;ve moved off of the wordpress.com platform to using wordpress on a web host.</p>
<p>Check it out at <a href="http://www.unstructuredthoughts.com">http://www.unstructuredthoughts.com</a></p>
<p>And update your feed readers as appropriate with the <a href="http://feeds.feedburner.com/UnstructuredThoughts">new RSS feed</a> you find there!</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/unstructuredthoughts.wordpress.com/128/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/unstructuredthoughts.wordpress.com/128/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/unstructuredthoughts.wordpress.com/128/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/unstructuredthoughts.wordpress.com/128/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/unstructuredthoughts.wordpress.com/128/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/unstructuredthoughts.wordpress.com/128/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/unstructuredthoughts.wordpress.com/128/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/unstructuredthoughts.wordpress.com/128/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/unstructuredthoughts.wordpress.com/128/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/unstructuredthoughts.wordpress.com/128/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/unstructuredthoughts.wordpress.com/128/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/unstructuredthoughts.wordpress.com/128/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/unstructuredthoughts.wordpress.com/128/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/unstructuredthoughts.wordpress.com/128/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=unstructuredthoughts.wordpress.com&amp;blog=11142630&amp;post=128&amp;subd=unstructuredthoughts&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://unstructuredthoughts.wordpress.com/2010/06/29/this-blog-has-moved/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/07e691cb7db0378cc69fe43d15983b14?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">joshualpayne</media:title>
		</media:content>
	</item>
		<item>
		<title>The Rise of Content Analytics: A Valedictory</title>
		<link>http://unstructuredthoughts.wordpress.com/2010/06/16/the-rise-of-content-analytics-a-valedictory/</link>
		<comments>http://unstructuredthoughts.wordpress.com/2010/06/16/the-rise-of-content-analytics-a-valedictory/#comments</comments>
		<pubDate>Wed, 16 Jun 2010 18:53:53 +0000</pubDate>
		<dc:creator>Josh Payne</dc:creator>
				<category><![CDATA[Content Analytics]]></category>
		<category><![CDATA[Content Classification]]></category>

		<guid isPermaLink="false">http://unstructuredthoughts.wordpress.com/?p=125</guid>
		<description><![CDATA[As my colleagues inside IBM have known for a over a week, I&#8217;ve decided to leave IBM to pursue other professional opportunities. Before I put out some blog posts on my future, I wanted to use this opportunity to look backwards at how far the ECM business has come with respect to discovery and content [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=unstructuredthoughts.wordpress.com&amp;blog=11142630&amp;post=125&amp;subd=unstructuredthoughts&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>As my colleagues inside IBM have known for a over a week, I&#8217;ve decided to leave IBM to pursue other professional opportunities. Before I put out some blog posts on my future, I wanted to use this opportunity to look backwards at how far the ECM business has come with respect to discovery and content ananlytics over the last 5 years.</p>
<p>I came to IBM as part of the acquisition of a small enterprise search vendor &#8212; iPhrase Technologies. We joined up with a product team inside IBM building a product called &#8220;Information Integrator OmniFind Edition&#8221; to attack the enterprise search market.  Though we were grouped inside the Content Management organization, we really went about our business independently relative to our ECM brethren, focusing on the search solutions, leveraging content analytics technologies for &#8216;concept searching&#8217;.</p>
<p>1 year later, FileNet joined IBM and we began to try to apply our search and discovery technologies to ECM centric business scenarios. As we began to collaborate, one of the first things that struck me about ECM, was the treatment of the documents. In enterprise search, documents were something to be cracked open by definition &#8212; how else to search it?</p>
<p>Yet the ECM world had a tendancy to treat a document as an &#8216;object&#8217; &#8211;objects to be handled and managed. It struck me as digital paper shuffling where the expectation that ECM was for readying the document for someone with 2 eyes to read it and use it (and don&#8217;t get me wrong, it was challenging paper shuffling &#8212; billions of objects, large scale scanning &#8212; tough, tough problems).</p>
<p>Within this context we set down a path of applying analytics technologies to ECM. Our first step was to weave IBM&#8217;s content classification product within the ECM architecture, applying it to compelling scenarios in email archiving and records management. Next, we brought to market an eDiscovery solution built with analytics at its core. These first two steps were exciting but focused attempts at bringing about a better solution to specific ECM problems with content analytics, especially in the information governance market.</p>
<p>Then last year, IBM brought made generally availabile our Content Analytics platform. This third step is especially gratifying. Content analytics technologies have moved being an isolated technology, separate from ECM to delivering insight about businesses by leveraging the text inside of documents &#8212; the insides of these objects.</p>
<p>The embrace and adoption of content analytics is especially gratifying for me personally. Though I had but a small role, the change inside IBM ECM and externally amongst customers, analysts and others is stark relative to when I joined IBM.  Content is no longer simply an &#8216;object&#8217; to be managed &#8212; its an asset to be leveraged and this is a striking difference. I am confident that in the coming months and years this will increasingly become the accepted attitude and approach in ECM.</p>
<p>On that note, I want to thank folks for reading this blog on the topics of content classification and content analytics. For folks who are interested in more writing on information lifecycle governance, <a href="http://craigrhinehart.wordpress.com/">Craig Rhinehart continues to write on this topic at his blog.</a></p>
<p>Since my professional life will take me away from content analytics in the near term, I expect that this blog will start to reflect the new paths I&#8217;ll be following on my professional, post-IBM journey.</p>
<p>I hope you&#8217;ll continue to read as my journey takes these exciting new steps.</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/unstructuredthoughts.wordpress.com/125/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/unstructuredthoughts.wordpress.com/125/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/unstructuredthoughts.wordpress.com/125/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/unstructuredthoughts.wordpress.com/125/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/unstructuredthoughts.wordpress.com/125/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/unstructuredthoughts.wordpress.com/125/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/unstructuredthoughts.wordpress.com/125/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/unstructuredthoughts.wordpress.com/125/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/unstructuredthoughts.wordpress.com/125/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/unstructuredthoughts.wordpress.com/125/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/unstructuredthoughts.wordpress.com/125/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/unstructuredthoughts.wordpress.com/125/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/unstructuredthoughts.wordpress.com/125/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/unstructuredthoughts.wordpress.com/125/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=unstructuredthoughts.wordpress.com&amp;blog=11142630&amp;post=125&amp;subd=unstructuredthoughts&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://unstructuredthoughts.wordpress.com/2010/06/16/the-rise-of-content-analytics-a-valedictory/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/07e691cb7db0378cc69fe43d15983b14?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">joshualpayne</media:title>
		</media:content>
	</item>
		<item>
		<title>Throwing disk at the problem isn&#8217;t the long term solution</title>
		<link>http://unstructuredthoughts.wordpress.com/2010/05/10/throwing-disk-at-the-problem-isnt-the-long-term-solution/</link>
		<comments>http://unstructuredthoughts.wordpress.com/2010/05/10/throwing-disk-at-the-problem-isnt-the-long-term-solution/#comments</comments>
		<pubDate>Mon, 10 May 2010 18:21:03 +0000</pubDate>
		<dc:creator>Josh Payne</dc:creator>
				<category><![CDATA[Content Assessment]]></category>

		<guid isPermaLink="false">http://unstructuredthoughts.wordpress.com/?p=120</guid>
		<description><![CDATA[Last week I gave 8 talks on the topic of content analytics over the course of 2 regional marketing events in Washington DC and Atlanta. Having given that many talks on related topics so frequently in such a short time period, I found myself locking in on a few key statistics and facts, and I [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=unstructuredthoughts.wordpress.com&amp;blog=11142630&amp;post=120&amp;subd=unstructuredthoughts&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>Last week I gave 8 talks on the topic of content analytics over the course of 2 <a href="https://www-950.ibm.com/events/wwe/ecm/ecmruns10.nsf">regional marketing events</a> in Washington DC and Atlanta. Having given that many talks on related topics so frequently in such a short time period, I found myself locking in on a few key statistics and facts, and I was reminded of that fact as I read <a href="http://craigrhinehart.wordpress.com/2010/05/08/spring-cleaning-for-information/">Craig Rhinehart&#8217;s most recent missive on his blog</a>.  In my talks last week I similarly made the point that the &#8220;save everything&#8221; ethos described by Craig is losing steam. Why? The cost of storage isn&#8217;t dropping as quickly as the information is being generated. Organizations are coming to the realization that it&#8217;s simply not cost effective to &#8216;throw storage&#8217; at the problem. The statistic I found myself using repeatedly last week was cited in <a href="http://blogs.forrester.com/matthew_brown/10-03-31-search_information_governance_enterprise">a recent Forrester blog posting</a></p>
<p>:</p>
<blockquote><p>It&#8217;s no surprise that Forrester clients report their storage capacity requirements are growing 20% to 40% each year. <a href="http://www.forrester.com/rb/Research/controlling_storage_cost_amid_high_growth/q/id/56071/t/2">Storage costs</a> have grown to 17% of the IT hardware budget, up from 10% in 2007.</p></blockquote>
<p>That jump from 10% to 17% is what I found myself repeating last week. Cost per GB is going down every year. But organizations keep on spending more and more of their budget on keeping stuff. Throwing more storage at the problem (and avoiding the cause) has simply led to increased costs across the board.  Not the hallmark of an effective, long-term solution.</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/unstructuredthoughts.wordpress.com/120/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/unstructuredthoughts.wordpress.com/120/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/unstructuredthoughts.wordpress.com/120/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/unstructuredthoughts.wordpress.com/120/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/unstructuredthoughts.wordpress.com/120/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/unstructuredthoughts.wordpress.com/120/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/unstructuredthoughts.wordpress.com/120/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/unstructuredthoughts.wordpress.com/120/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/unstructuredthoughts.wordpress.com/120/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/unstructuredthoughts.wordpress.com/120/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/unstructuredthoughts.wordpress.com/120/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/unstructuredthoughts.wordpress.com/120/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/unstructuredthoughts.wordpress.com/120/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/unstructuredthoughts.wordpress.com/120/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=unstructuredthoughts.wordpress.com&amp;blog=11142630&amp;post=120&amp;subd=unstructuredthoughts&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://unstructuredthoughts.wordpress.com/2010/05/10/throwing-disk-at-the-problem-isnt-the-long-term-solution/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/07e691cb7db0378cc69fe43d15983b14?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">joshualpayne</media:title>
		</media:content>
	</item>
		<item>
		<title>You Need Content Analytics to Determine the Value of Content</title>
		<link>http://unstructuredthoughts.wordpress.com/2010/05/07/you-need-content-analytics-to-determine-the-value-of-content/</link>
		<comments>http://unstructuredthoughts.wordpress.com/2010/05/07/you-need-content-analytics-to-determine-the-value-of-content/#comments</comments>
		<pubDate>Fri, 07 May 2010 12:48:04 +0000</pubDate>
		<dc:creator>Josh Payne</dc:creator>
				<category><![CDATA[Content Analytics]]></category>
		<category><![CDATA[Content Assessment]]></category>

		<guid isPermaLink="false">http://unstructuredthoughts.wordpress.com/?p=118</guid>
		<description><![CDATA[I went on vacation last week.* (side note &#8212; though I&#8217;ve embraced twitter, foursquare and other modern public media platforms, I&#8217;ve yet to embrace the idea of broadcasting to the world the fact that my house was completely empty and I was 1000 miles away to the world at large &#8211; call me old fashioned [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=unstructuredthoughts.wordpress.com&amp;blog=11142630&amp;post=118&amp;subd=unstructuredthoughts&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>I went on vacation last week.* (side note &#8212; though I&#8217;ve embraced twitter, foursquare and other modern public media platforms, I&#8217;ve yet to embrace the idea of broadcasting to the world the fact that my house was completely empty and I was 1000 miles away to the world at large &#8211; call me old fashioned if you must).</p>
<p>I mention it not to gloat about how much fun I had with my kids, but to bring up what I did the day before I departed. Again, call me old fashioned, but I typically get my books not from amazon, a bookstore or via an iPad, but from a more cost effective source: the public library. Quaint, I know.</p>
<p>When I go to the library, I can&#8217;t go without a plan. I can&#8217;t simply browse the stacks to find a good book. Yes, the library is well organized (good classifications!). And each book has good information on the cover describing the contents (standard metadata!) like author and title. But that information exterior to the contents just is not effective in helping me quickly determining the value of a book relative to my needs. I prepare in advance by reading reviews of others – other people who’ve read the books and analyzed their value. Otherwise finding a good couple of books for my vacation is an overwhelming and frustrating task.</p>
<p>The same idea – expending effort to analyze the long-form text inside content – applies to the content inside your organization. In previous postings I’ve discussed the <a href="http://unstructuredthoughts.wordpress.com/category/content-assessment/">value of content assessment</a> to your organization. And to execute content assessment you need to execute content analytics. Historic approaches to tackling the content assessment problem have focused on  metadata exterior to a document – the title, the author, the dates. This is much like trying to find a library book just by browsing the stacks. Determining what content is necessary to your organization – <a href="http://unstructuredthoughts.wordpress.com/2010/04/21/115/">what content is valuable, requires governance, is legally relevant </a>– is virtually impossible simply by examining data exterior to your content.</p>
<p>Content analytics provides your organization the ability to determine the value of your content by interrogating the interior of those documents. Metadata on the outside of a document is only part of the story. What concepts are covered in the document? Does this document concern itself with a customer? A business partner? Does this document concern itself with a particular business activity?</p>
<p>All of these questions are difficult to answer without examining the text in a document but given the volume of information in your organization, it’s difficult to actually make these assessments on a large scale basis.  In my next posting I’ll cover how content analytics can help to answer these valuation questions.</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/unstructuredthoughts.wordpress.com/118/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/unstructuredthoughts.wordpress.com/118/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/unstructuredthoughts.wordpress.com/118/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/unstructuredthoughts.wordpress.com/118/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/unstructuredthoughts.wordpress.com/118/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/unstructuredthoughts.wordpress.com/118/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/unstructuredthoughts.wordpress.com/118/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/unstructuredthoughts.wordpress.com/118/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/unstructuredthoughts.wordpress.com/118/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/unstructuredthoughts.wordpress.com/118/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/unstructuredthoughts.wordpress.com/118/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/unstructuredthoughts.wordpress.com/118/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/unstructuredthoughts.wordpress.com/118/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/unstructuredthoughts.wordpress.com/118/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=unstructuredthoughts.wordpress.com&amp;blog=11142630&amp;post=118&amp;subd=unstructuredthoughts&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://unstructuredthoughts.wordpress.com/2010/05/07/you-need-content-analytics-to-determine-the-value-of-content/feed/</wfw:commentRss>
		<slash:comments>1</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/07e691cb7db0378cc69fe43d15983b14?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">joshualpayne</media:title>
		</media:content>
	</item>
		<item>
		<title>The Value of Content Assessment</title>
		<link>http://unstructuredthoughts.wordpress.com/2010/04/21/115/</link>
		<comments>http://unstructuredthoughts.wordpress.com/2010/04/21/115/#comments</comments>
		<pubDate>Wed, 21 Apr 2010 14:30:13 +0000</pubDate>
		<dc:creator>Josh Payne</dc:creator>
				<category><![CDATA[Content Analytics]]></category>
		<category><![CDATA[Content Assessment]]></category>

		<guid isPermaLink="false">http://unstructuredthoughts.wordpress.com/?p=115</guid>
		<description><![CDATA[In my previous post, the first in my series on content assessment, I described the information landscape with respect to content. Organizations are facing ever increasing volume, velocity and variety of information. Understanding growing piles of uncontrolled content through content analytics has clear benefits to organizations of every size. Each organization – and the range [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=unstructuredthoughts.wordpress.com&amp;blog=11142630&amp;post=115&amp;subd=unstructuredthoughts&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>In my previous post, the first in my <a href="http://unstructuredthoughts.wordpress.com/category/content-assessment/">series on content assessment</a>, I described the information landscape with respect to content. Organizations are facing ever increasing volume, velocity and variety of information. Understanding growing piles of uncontrolled content through content analytics has clear benefits to organizations of every size. Each organization – and the range of stakeholders in those organizations – will benefit from engaging in content assessment. How? In three main ways:</p>
<p>1) There is value to all stakeholders in simply understanding content better through analytics. Dynamically analyzing silos of unmanaged, uncontrolled content via content analytics provides new insight about this information stakeholders previously did not have. Before, stakeholders simply knew the ‘speeds and feeds’ about a content repository: the number of documents, the size of those document, etc. Content analytics now delivers insight about the content and that insight leads to better, more informed decision making. Which areas represent the most risk? Where should we start our governance efforts? Where should our priorities lie? What is the projected ROI of better information lifecycle governance?</p>
<p>Today, organizations make these kinds of decisions about their unstructured content repositories with limited data. More likely, they avoid making decisions because they lack this kind of insight. No longer. Improved understanding and insight about your unstructured information leads to better decisions about how to take action.</p>
<p>2) One such action to take is to decommission content, the systems that support that content and the systems that rely upon that content. Decommissioning is primarily an IT concern. They manage the costs of the information infrastructure. By default, most organizations have been doing nothing with their content. And as such their infrastructure costs have continued to rise. With an understanding of the content, you can take on these these once avoided decisions with more confidence. By understanding the content in a particular system, you can take action to shut those systems down and save costs.</p>
<p>3) There is a flip side to decommissioning old content and the systems that support content. It is that by understanding content, you will be empowered to preserve the necessary content. Preserving the necessary content enables the decommissioning you want to execute.</p>
<p><em>Content assessment provides you the ability to identify content that is valuable</em>. This makes general line of business users happy, as they are resistant to decommissioning because they don’t want you to throw away ‘something they’ll need’ in the future.</p>
<p><em>Content assessment provides you the tools to identify content that requires lifecycle governance</em>. The compliance officers and records managers will be happy because your organization’s obligations will be met in a documented process. You will be taking steps to enforce your content policies on disposition of content while still working to control your costs.</p>
<p><em>Content assessment provides you the tool to identify content that is legally relevant</em>. The lawyers will be happy because they can use it to find the information relevant to legal cases where it resides in uncontrolled environments – and exert the kind of control the eDiscovery process demand.</p>
<p>Three main ways content assessment delivers value to your organization: via understanding of you content on its own; via decommissioning and consequently reduction of IT cost; via preservation and governance for fulfilling the needs of line-of-business stakeholders and compliance minded stakeholders alike.</p>
<p>Next in the <a href="http://unstructuredthoughts.wordpress.com/category/content-assessment/">content assessment series</a> . . . what content is ‘necessary’ to your organization and how does content analytics help to make this determination?</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/unstructuredthoughts.wordpress.com/115/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/unstructuredthoughts.wordpress.com/115/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/unstructuredthoughts.wordpress.com/115/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/unstructuredthoughts.wordpress.com/115/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/unstructuredthoughts.wordpress.com/115/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/unstructuredthoughts.wordpress.com/115/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/unstructuredthoughts.wordpress.com/115/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/unstructuredthoughts.wordpress.com/115/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/unstructuredthoughts.wordpress.com/115/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/unstructuredthoughts.wordpress.com/115/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/unstructuredthoughts.wordpress.com/115/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/unstructuredthoughts.wordpress.com/115/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/unstructuredthoughts.wordpress.com/115/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/unstructuredthoughts.wordpress.com/115/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=unstructuredthoughts.wordpress.com&amp;blog=11142630&amp;post=115&amp;subd=unstructuredthoughts&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://unstructuredthoughts.wordpress.com/2010/04/21/115/feed/</wfw:commentRss>
		<slash:comments>1</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/07e691cb7db0378cc69fe43d15983b14?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">joshualpayne</media:title>
		</media:content>
	</item>
		<item>
		<title>My College Laundry Habits and Your Organization&#8217;s Content Habits</title>
		<link>http://unstructuredthoughts.wordpress.com/2010/04/15/my-college-laundry-habits-and-your-organizations-content-habits/</link>
		<comments>http://unstructuredthoughts.wordpress.com/2010/04/15/my-college-laundry-habits-and-your-organizations-content-habits/#comments</comments>
		<pubDate>Thu, 15 Apr 2010 20:57:36 +0000</pubDate>
		<dc:creator>Josh Payne</dc:creator>
				<category><![CDATA[Content Analytics]]></category>
		<category><![CDATA[Content Assessment]]></category>

		<guid isPermaLink="false">http://unstructuredthoughts.wordpress.com/?p=111</guid>
		<description><![CDATA[First in a series of posts on content assessment. It has been quiet around this here blog. One reason was that the month of March saw two “once in 50 year” rain storms in the Boston area. I got to learn some valuable skills in flood prevention as a result – unfortunately, those lessons came [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=unstructuredthoughts.wordpress.com&amp;blog=11142630&amp;post=111&amp;subd=unstructuredthoughts&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>First in a series of posts on <em><a href="http://unstructuredthoughts.wordpress.com/category/content-assessment/">content assessment</a></em>.</p>
<p><a href="http://unstructuredthoughts.files.wordpress.com/2010/04/istock_000006409009xsmall.jpg"><img class="alignleft size-medium wp-image-113" title="Your Content?" src="http://unstructuredthoughts.files.wordpress.com/2010/04/istock_000006409009xsmall.jpg?w=300&#038;h=199" alt="Not to scale . . . my college laundry piles were *much* bigger" width="300" height="199" /></a>It has been quiet around this here blog. One reason was that the month of March saw two “once in 50 year” rain storms in the Boston area. I got to learn some <a href="http://tweetphoto.com/16455557">valuable skills</a> in flood prevention as a result – unfortunately, those lessons came at the cost of activities like blogging and tweeting . . . but I’m back and ready to roll with a series of posts on a topic I’ve been thinking and working on over the past 3 months – content assessment.</p>
<p>I <a href="https://www-950.ibm.com/blogs/icm/entry/content_assessment_get_insight_into_your_content_in_the_wild1?lang=en_us">introduced this topic</a> after our original <a href="http://www-03.ibm.com/press/us/en/pressrelease/28698.wss">announcement</a> for our <a href="http://www-01.ibm.com/software/data/content-management/assessment.html">content assessment</a> offering. And I’ve spent the last few months talking to IBM customers, analysts and other enterprise content professionals inside IBM. It’s an exciting application of content analytics technology to solve a class of problems that our customers have traditionally ignored . . . and hoped that it would go away &#8212; kind of like my laundry in college. Back then I kept on wearing my clean cloths day after day, hoping my laundry would magically wash itself. Not surprisingly, the cloths kept piling up. Finally, a random Sunday afternoon would arrive; I’d wake up, bite the bullet and wash my cloths. Ah  . . . to be 19 again . . . I digress.</p>
<p>Much as I continuously generated dirty cloths, organizations continue to generate content. And similar to the haphazard piles of laundry in my dorm room, these chaotic uncontrolled piles of content aren’t cleaning up themselves. And these piles of content are growing at a much faster pace.</p>
<p>In college, I’d wait until I couldn’t stand it anymore. And then I’d take action to take control of my clothing situation.  With the velocity, volume and variety of content growth, organizations are hitting a similar stage. They can’t maintain the same ‘do nothing, save everything’ practices about the content. The day has arrived to tackle those piles.</p>
<p>To IT, the costs are continuing to rise upwards (<a href="http://blogs.forrester.com/matthew_brown/10-03-31-search_information_governance_enterprise">17% of IT budgets are devoted to storage alone, up from 10% just a few years ago</a>). Records managers increasingly realize they can’t rely on users to identify and control business records. Legal needs to find the documents they need for eDiscovery proceedings – and fast.  Line of business users need better access and control of trusted content to better execute their business activities.</p>
<p>These information stake holders need better control over the necessary information for their business. But to take action to exert that control they need better understanding of their content landscape. They see the mounds of content, as far as their virtual eye can see. Years of bad content habits have created an intimidating problem that leaves them paralyzed as to how to solve it.</p>
<p>Content assessment solutions – powered by innovations in content analytics – are now ready to meet this challenge. Content assessment solutions deliver the kind of understanding organizations need to make decisions about their content. Empowered with insight about their content via content analytics, organizations can now take action. They can take action by decommissioning the content they no longer need. They can take action by decommissioning the systems and infrastructure that supports their unnecessary content. And they will be willing to take these cost cutting actions because they’ve identified and preserved the content that is necessary to their organization.</p>
<p>In the coming days and weeks, I’ll post more in this series of posts on content assessment – covering in more detail who benefits from content assessment, what those benefits are, and the key elements to a content assessment solution. Its an exciting new solution area.</p>
<p>You can’t avoid the grappling with the piles of content . . . just as I couldn’t avoid doing laundry.  If your content governance practices are analogous to my college laundry habits, content assessment is an idea you need to learn more about.</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/unstructuredthoughts.wordpress.com/111/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/unstructuredthoughts.wordpress.com/111/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/unstructuredthoughts.wordpress.com/111/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/unstructuredthoughts.wordpress.com/111/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/unstructuredthoughts.wordpress.com/111/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/unstructuredthoughts.wordpress.com/111/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/unstructuredthoughts.wordpress.com/111/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/unstructuredthoughts.wordpress.com/111/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/unstructuredthoughts.wordpress.com/111/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/unstructuredthoughts.wordpress.com/111/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/unstructuredthoughts.wordpress.com/111/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/unstructuredthoughts.wordpress.com/111/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/unstructuredthoughts.wordpress.com/111/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/unstructuredthoughts.wordpress.com/111/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=unstructuredthoughts.wordpress.com&amp;blog=11142630&amp;post=111&amp;subd=unstructuredthoughts&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://unstructuredthoughts.wordpress.com/2010/04/15/my-college-laundry-habits-and-your-organizations-content-habits/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/07e691cb7db0378cc69fe43d15983b14?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">joshualpayne</media:title>
		</media:content>

		<media:content url="http://unstructuredthoughts.files.wordpress.com/2010/04/istock_000006409009xsmall.jpg?w=300" medium="image">
			<media:title type="html">Your Content?</media:title>
		</media:content>
	</item>
		<item>
		<title>The Question I Hear Every Day &#8211; What is your Accuracy?</title>
		<link>http://unstructuredthoughts.wordpress.com/2010/03/08/the-question-i-hear-every-day-what-is-your-accuracy/</link>
		<comments>http://unstructuredthoughts.wordpress.com/2010/03/08/the-question-i-hear-every-day-what-is-your-accuracy/#comments</comments>
		<pubDate>Mon, 08 Mar 2010 20:11:17 +0000</pubDate>
		<dc:creator>Josh Payne</dc:creator>
				<category><![CDATA[Content Classification]]></category>

		<guid isPermaLink="false">http://unstructuredthoughts.wordpress.com/?p=103</guid>
		<description><![CDATA[“What is the accuracy of your product” I’ve probably been asked that question in every presentation on content classification I’ve given, since I first started working on IBM&#8217;s classification product, over three years ago. I know two things when I’m asked the question: that the inquisitor wants a short answer and that the answer isn’t as [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=unstructuredthoughts.wordpress.com&amp;blog=11142630&amp;post=103&amp;subd=unstructuredthoughts&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>“What is the accuracy of your product”</p>
<p>I’ve probably been asked that question in every presentation on content classification I’ve given, since I first started working on IBM&#8217;s <a href="http://www.informationmanagementrequest.com/campaigns/compliance_warehouse/site/advancedClass.html?sor=">classification product</a>, over three years ago.</p>
<p>I know two things when I’m asked the question: that the inquisitor wants a short answer and that the answer isn’t as simple as the inquisitor expects.</p>
<p>The way the question is framed – the simple straightforward request for accuracy results – implies an underlying assumption that the proper categorization of content in a business scenario absolutely and definitively exists. I was reminded of this as I read a <a href="http://www3.interscience.wiley.com/cgi-bin/fulltext/122632117/PDFSTART">nice study</a> on the accuracy of document categorization, written by the <a href="http://www.ediscoveryinstitute.org/">eDiscovery Institute</a> and published this year. It stated:</p>
<blockquote><p>Ultimately, measurement of accuracy implies that we have some reliable ground truth or gold standard against which to compare the classifier, but such a standard is generally lacking for measure of information retrieval in general and for legal discovery in particular.</p></blockquote>
<p>The paper, <a href="http://www3.interscience.wiley.com/cgi-bin/fulltext/122632117/PDFSTART">Document Categorization in Legal Electronic Discovery: Computer Classification vs. Manual Review</a>, explores the accuracy of automated document classification, specifically in comparison to human based alternatives. In the paper, the authors explore whether automated classification is a reasonable alternative to use when categorizing documents in a legal discovery review.  The authors worked with a corpus of documents from a real regulatory inquiry.</p>
<p>The original lawyers involved in the case had categorized the documents. This is a ready-made training set from which the computer-based classifiers could learn and is exactly what the authors did. In turn, these well-trained classifiers categorized other content gathered for the case.</p>
<p>Yet,to assess quality of their automated classification methods, the authors didn’t compare the automated results against the results of the original reviewers. Rather, they tasked an entire new set of human reviewers (&#8220;re-reviewers&#8221;) to classify documents from the corpus.</p>
<p>The authors, for deriving their conclusions, compared the results of these re-reviewers with those of the automated classifiers. I think of this as a fair fight – comparing the results of the computers with the same task as executed by humans.</p>
<p>The human re-reviewers agreed with the original reviewers approximately 79.8% of the time.</p>
<p>Not exactly the kind of consistent accuracy we expect out of our reliable employees, is it?</p>
<p>Based on this level of disagreement, the authors have illustrated their assertion that there really can’t be a reliable ‘gold standard’ of truth in categorization of documents. The ‘right’ answer is not so easily identified in every case &#8211; in most cases, in fact.</p>
<p>By comparison, automated methods agreed with the original reviewers over 80% of the time.</p>
<p>So what did I learn from this paper?</p>
<p>1) The human reviewers aren’t perfect. The human re-reviewers aren’t perfect. And of course the automated replacements for the human analysis aren’t perfect. I tend to give human classifiers too much credit, in fact. No method is perfect.</p>
<p>But . . .</p>
<p>2) The fact that automated classification can do just as well, if not slightly better than the human re-reviewers leads the authors to conclude that “employing as system like one of the two systems employed in this task will yield results that are comparable to the traditional practice in discovery and would therefore appear to be reasonable.”</p>
<p>And that is the key – the software isn’t perfect. But neither are the motivated, knowledgeable humans. And the automated methods, though a bit more mysterious, give comparable results – at a fraction of the cost.</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/unstructuredthoughts.wordpress.com/103/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/unstructuredthoughts.wordpress.com/103/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/unstructuredthoughts.wordpress.com/103/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/unstructuredthoughts.wordpress.com/103/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/unstructuredthoughts.wordpress.com/103/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/unstructuredthoughts.wordpress.com/103/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/unstructuredthoughts.wordpress.com/103/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/unstructuredthoughts.wordpress.com/103/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/unstructuredthoughts.wordpress.com/103/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/unstructuredthoughts.wordpress.com/103/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/unstructuredthoughts.wordpress.com/103/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/unstructuredthoughts.wordpress.com/103/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/unstructuredthoughts.wordpress.com/103/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/unstructuredthoughts.wordpress.com/103/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=unstructuredthoughts.wordpress.com&amp;blog=11142630&amp;post=103&amp;subd=unstructuredthoughts&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://unstructuredthoughts.wordpress.com/2010/03/08/the-question-i-hear-every-day-what-is-your-accuracy/feed/</wfw:commentRss>
		<slash:comments>1</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/07e691cb7db0378cc69fe43d15983b14?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">joshualpayne</media:title>
		</media:content>
	</item>
		<item>
		<title>Google&#8217;s Improvements Extend into Information Governance</title>
		<link>http://unstructuredthoughts.wordpress.com/2010/02/23/googles-improvements-extend-into-information-governanc/</link>
		<comments>http://unstructuredthoughts.wordpress.com/2010/02/23/googles-improvements-extend-into-information-governanc/#comments</comments>
		<pubDate>Tue, 23 Feb 2010 15:10:26 +0000</pubDate>
		<dc:creator>Josh Payne</dc:creator>
				<category><![CDATA[Content Classification]]></category>
		<category><![CDATA[Master Content]]></category>

		<guid isPermaLink="false">http://unstructuredthoughts.wordpress.com/?p=98</guid>
		<description><![CDATA[I just read with great interest Steven Levy&#8217;s article in Wired on Google&#8217;s search algorithm and how Google works to improve it. A couple of things leaped out at me as concepts I&#8217;ve discussed here in the past (or on my old blog), as the concepts extend into the enterprise. Just as Google uses them [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=unstructuredthoughts.wordpress.com&amp;blog=11142630&amp;post=98&amp;subd=unstructuredthoughts&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>I just read with great interest Steven Levy&#8217;s <a href="http://www.wired.com/magazine/2010/02/ff_google_algorithm/">article in Wired on Google&#8217;s search algorithm</a> and how Google works to improve it. A couple of things leaped out at me as concepts I&#8217;ve discussed here in the past (or on my old blog), as the concepts extend into the enterprise. Just as Google uses them to improve their consumer search experience, you can leverage them within the context of better information governance.</p>
<p>1) Google uses document context similar to how I have describe <a href="https://www-950.ibm.com/blogs/icm/entry/take_in_your_surroundings_when_classifying_content?lang=en_us">advanced content classification as a &#8220;context-based&#8221; method</a> of classifying information.  Levy writes:</p>
<blockquote><p>Google’s synonym system understood that a dog was similar to a puppy and that boiling water was hot. But it also concluded that a hot dog was the same as a boiling puppy. The problem was fixed in late 2002 by a breakthrough based on philosopher Ludwig Wittgenstein’s <a href="http://plato.stanford.edu/entries/wittgenstein/">theories</a>about how words are defined by context. As Google crawled and archived billions of documents and Web pages, it analyzed what words were close to each other. “Hot dog” would be found in searches that also contained “bread” and “mustard” and “baseball games” — not poached pooches. That helped the algorithm understand what “hot dog” — and millions of other terms — meant. “Today, if you type ‘Gandhi bio,’ we know that bio means biography,” Singhal says. “And if you type ‘bio warfare,’ it means biological.”</p></blockquote>
<p>Google uses the context of the content it indexes to better understand the purpose and intent of a particular document and in turn the purpose and intent of your particular search query. Advanced content classification methods deliver better categorization results in a similar way &#8212; it is using the full context of the training documents provided to it to better results.</p>
<p>2) When discussing &#8216;trusted content&#8217;, I used the example of how <a href="http://unstructuredthoughts.wordpress.com/2010/01/14/what-is-master-content/">Google trusts some sources over others</a>. At the time, I didn&#8217;t have a source for this assertion. Levy describes this in some detail in the article:</p>
<blockquote><p>That same year, an engineer named<a href="http://research.google.com/people/krishna/">Krishna Bharat</a>, figuring that links from recognized authorities should carry more weight, devised a powerful signal that confers extra credibility to references from experts’ sites. (It would become Google’s first patent.) The most recent major change, codenamed Caffeine, revamped the entire indexing system to make it even easier for engineers to add signals.</p></blockquote>
<p>Do read the entire article if you&#8217;re interested in these topics &#8212; given our universal reliance on Google as consumers, its certainly beneficial to be an educated consumer. And these concepts can extent into better proactive management of your enterprise content.</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/unstructuredthoughts.wordpress.com/98/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/unstructuredthoughts.wordpress.com/98/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/unstructuredthoughts.wordpress.com/98/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/unstructuredthoughts.wordpress.com/98/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/unstructuredthoughts.wordpress.com/98/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/unstructuredthoughts.wordpress.com/98/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/unstructuredthoughts.wordpress.com/98/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/unstructuredthoughts.wordpress.com/98/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/unstructuredthoughts.wordpress.com/98/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/unstructuredthoughts.wordpress.com/98/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/unstructuredthoughts.wordpress.com/98/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/unstructuredthoughts.wordpress.com/98/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/unstructuredthoughts.wordpress.com/98/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/unstructuredthoughts.wordpress.com/98/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=unstructuredthoughts.wordpress.com&amp;blog=11142630&amp;post=98&amp;subd=unstructuredthoughts&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://unstructuredthoughts.wordpress.com/2010/02/23/googles-improvements-extend-into-information-governanc/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/07e691cb7db0378cc69fe43d15983b14?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">joshualpayne</media:title>
		</media:content>
	</item>
		<item>
		<title>Better Content Classification Than My One Year Old</title>
		<link>http://unstructuredthoughts.wordpress.com/2010/02/18/better-content-classification-than-my-one-year-old/</link>
		<comments>http://unstructuredthoughts.wordpress.com/2010/02/18/better-content-classification-than-my-one-year-old/#comments</comments>
		<pubDate>Fri, 19 Feb 2010 02:40:57 +0000</pubDate>
		<dc:creator>Josh Payne</dc:creator>
				<category><![CDATA[Content Classification]]></category>

		<guid isPermaLink="false">http://unstructuredthoughts.wordpress.com/?p=78</guid>
		<description><![CDATA[When I&#8217;m explaining advanced content classification to audiences and why these methods are more powerful and accurate, I frequently tell this story. It illustrates the value of advanced methods of classification over more rudimentary, rule-based approaches. I think audiences have found it helpful. I emphasize the fact that advanced methods gain their greater accuracyfrom taking [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=unstructuredthoughts.wordpress.com&amp;blog=11142630&amp;post=78&amp;subd=unstructuredthoughts&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<div id="attachment_96" class="wp-caption alignleft" style="width: 210px"><a href="http://unstructuredthoughts.files.wordpress.com/2010/02/duck1.jpg"><img class="size-medium wp-image-96" title=" My daughter: &quot;DUCK!&quot;" src="http://unstructuredthoughts.files.wordpress.com/2010/02/duck1.jpg?w=200&#038;h=300" alt="DUCK!" width="200" height="300" /></a><p class="wp-caption-text">courtesy freefoto.com</p></div>
<p>When I&#8217;m explaining advanced content classification to audiences and why these methods are more powerful and accurate, I frequently tell this story. It illustrates the value of advanced methods of classification over more rudimentary, rule-based approaches. I think audiences have found it helpful.</p>
<p>I emphasize the fact that advanced methods gain their greater accuracyfrom taking the &#8216;full context&#8217; of the long-form text into account. Advanced methods aren&#8217;t just using one word, or two words into account &#8212; they&#8217;re taking into account hundreds and thousands of words, weighing each word&#8217;s significance and coming up with a cumulative, holistic assessment of the similarity of the text to each category. Innumerable factors are being taken into account.  Hundreds and thousands of factors are being weighed. By comparison, a simple keyword based rule for categorization isn&#8217;t taking innumerable factors into account. Its just taking one.</p>
<p>The analogy I draw is the difference between an adult&#8217;s ability to categorize and my daughter. Let me explain.</p>
<p>My daughter is 18 months old. One of her very first words was &#8220;duck.&#8221;. [I believe the order of new words was "Mama", "Duck", 15 other words, then "Dada." I digress]</p>
<p>In that first set of words, duck was pretty much alone relative to other animals. There might have been a &#8220;dog&#8221; in there, but that was about it when it came to naming things in the animal kingdom. Certainly it was the only bird she new in the bird class of the animal taxonomy.</p>
<p>So when she saw a duck, she enthusiastically blurted out &#8220;DUCK!.&#8221;</p>
<p>And when she saw a pigeon, she exclaimed &#8220;DUCK!.&#8221;</p>
<p>And when she saw a hawk, she of course shouted out &#8220;DUCK!.&#8221;</p>
<p>If it had wings and a beak, she named it a duck.</p>
<p>Why? Because her relatively immature mind was only taken a few factors into account. We as adults can say, &#8220;yes, its got a beak, but its beak is pretty sharp, and its feet aren&#8217;t webbed and . . . well therefore its a hawk, not a duck.&#8221;</p>
<p>My daughter was acting like a simple rules based classifier. She took one or two key factors into account and made her decision.</p>
<p>We worked on her and now she can distinguish between birds and ducks. She&#8217;s making progress. Her brain is constantly in &#8216;upgrade&#8217; mode. You should look into upgrading your classification methods too if you&#8217;re only focused on rules-based approaches too.</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/unstructuredthoughts.wordpress.com/78/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/unstructuredthoughts.wordpress.com/78/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/unstructuredthoughts.wordpress.com/78/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/unstructuredthoughts.wordpress.com/78/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/unstructuredthoughts.wordpress.com/78/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/unstructuredthoughts.wordpress.com/78/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/unstructuredthoughts.wordpress.com/78/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/unstructuredthoughts.wordpress.com/78/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/unstructuredthoughts.wordpress.com/78/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/unstructuredthoughts.wordpress.com/78/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/unstructuredthoughts.wordpress.com/78/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/unstructuredthoughts.wordpress.com/78/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/unstructuredthoughts.wordpress.com/78/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/unstructuredthoughts.wordpress.com/78/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=unstructuredthoughts.wordpress.com&amp;blog=11142630&amp;post=78&amp;subd=unstructuredthoughts&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://unstructuredthoughts.wordpress.com/2010/02/18/better-content-classification-than-my-one-year-old/feed/</wfw:commentRss>
		<slash:comments>3</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/07e691cb7db0378cc69fe43d15983b14?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">joshualpayne</media:title>
		</media:content>

		<media:content url="http://unstructuredthoughts.files.wordpress.com/2010/02/duck1.jpg?w=200" medium="image">
			<media:title type="html"> My daughter: &#34;DUCK!&#34;</media:title>
		</media:content>
	</item>
		<item>
		<title>Mistakes Made By People are Forgivable</title>
		<link>http://unstructuredthoughts.wordpress.com/2010/02/10/mistakes-made-by-people-are-forgivable/</link>
		<comments>http://unstructuredthoughts.wordpress.com/2010/02/10/mistakes-made-by-people-are-forgivable/#comments</comments>
		<pubDate>Thu, 11 Feb 2010 02:52:44 +0000</pubDate>
		<dc:creator>Josh Payne</dc:creator>
				<category><![CDATA[Content Analytics]]></category>
		<category><![CDATA[Content Classification]]></category>

		<guid isPermaLink="false">http://unstructuredthoughts.wordpress.com/?p=84</guid>
		<description><![CDATA[In a previous post, I emphasized the importance of rigorous, controlled testing when assessing the potential of content analytics. This is especially important for content classification when it is being used to replace human decision-making.  My broader point  in that post was that when adopting new technology, you can&#8217;t rely on the qualititative perception of the skeptical [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=unstructuredthoughts.wordpress.com&amp;blog=11142630&amp;post=84&amp;subd=unstructuredthoughts&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p><a href="http://unstructuredthoughts.wordpress.com/2010/01/26/eyeballing-your-content-analytics-results/">In a previous post</a>, I emphasized the importance of rigorous, controlled testing when assessing the potential of content analytics. This is especially important for content classification when it is being used to replace human decision-making.  My broader point  in that post was that when adopting new technology, you can&#8217;t rely on the qualititative perception of the skeptical observer.</p>
<p>A similar topic, that of adoption of technology in the legal profession, came up at the keynote to LegalTech last week. <a href="http://www.law.com/jsp/article.jsp?id=1202441930493">Law.com recounts</a> <a href="http://en.wikipedia.org/wiki/Lisa_Sanders">Dr. Lisa Sanders&#8217; response</a>, which was far more eloquent than my post so I wanted to pass it along here:</p>
<blockquote><p>During the question-and-answer session, <a href="http://www.kelleydrye.com/home" target="new">Kelley Drye &amp; Warren</a> Practice Development Manager Jennifer Topper asked the panel how to convince litigators to use tools like technology and decision trees, repeatable processes that can help make handling similar cases more efficient.</p>
<p>It&#8217;s a long process of changing attitudes within a corporate culture, Dr. Sanders said. &#8220;Mistakes made by a computer or guideline live forever in the minds of people watching them. Mistakes made by people are forgivable.&#8221;</p></blockquote>
<p>I guess that&#8217;s why she writes for the New York Times . . .</p>
<p>(the LegalTech keynote has been quite the <a href="http://unstructuredthoughts.wordpress.com/2010/02/08/dehumanizing-human-analysis/">blogging gift this week</a>. Maybe I should volunteer to staff the IBM booth next year and get the scoop first hand)</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/unstructuredthoughts.wordpress.com/84/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/unstructuredthoughts.wordpress.com/84/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/unstructuredthoughts.wordpress.com/84/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/unstructuredthoughts.wordpress.com/84/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/unstructuredthoughts.wordpress.com/84/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/unstructuredthoughts.wordpress.com/84/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/unstructuredthoughts.wordpress.com/84/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/unstructuredthoughts.wordpress.com/84/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/unstructuredthoughts.wordpress.com/84/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/unstructuredthoughts.wordpress.com/84/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/unstructuredthoughts.wordpress.com/84/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/unstructuredthoughts.wordpress.com/84/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/unstructuredthoughts.wordpress.com/84/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/unstructuredthoughts.wordpress.com/84/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=unstructuredthoughts.wordpress.com&amp;blog=11142630&amp;post=84&amp;subd=unstructuredthoughts&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://unstructuredthoughts.wordpress.com/2010/02/10/mistakes-made-by-people-are-forgivable/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/07e691cb7db0378cc69fe43d15983b14?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">joshualpayne</media:title>
		</media:content>
	</item>
	</channel>
</rss>
