fix cache queries for non core2 CPU ;)
diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h
index 5bad8e3..93214d3 100644
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@@ -592,73 +592,87 @@
 #    define EIGEN_CPUID(abcd,func) \
        __asm__ __volatile__ ("xchgl %%ebx, %%esi;cpuid; xchgl %%ebx,%%esi": "=a" (abcd[0]), "=S" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func));
 #    else
-#    define EIGEN_CPUID(abcd,func) \
-       __asm__ __volatile__ ("cpuid": "=a" (abcd[0]), "=b" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func) );
+#    define EIGEN_CPUID(abcd,func,id) \
+       __asm__ __volatile__ ("cpuid": "=a" (abcd[0]), "=b" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id) );
 #    endif
 #elif defined(_MSC_VER)
 #    define EIGEN_CPUID(abcd,func) __cpuid((int*)abcd,func)
 #endif
 
 /** \internal
- * \returns the size in Bytes of the L1 data cache */
-inline std::ptrdiff_t ei_queryL1CacheSize()
+ * Queries and returns the cache sizes in Bytes of the L1, L2, and L3 data caches respectively */
+inline void ei_queryCacheSizes(int& l1, int& l2, int& l3)
 {
   #ifdef EIGEN_CPUID
   int abcd[4];
-
-  // try the direct method using extended level
-  EIGEN_CPUID(abcd,0x80000005);
-  std::ptrdiff_t l1 = std::ptrdiff_t(abcd[2] >> 24) * 1024;
-
-  if(l1>0)
-    return l1*1024;
-
-  // it fails, try using the standard level
-  EIGEN_CPUID(abcd,0x00000002);
-  unsigned char * bytes = reinterpret_cast<unsigned char *>(abcd)+2;
-  for(int i=0; i<14; ++i)
+  
+  const char GenuineIntel_char[] = "GenuntelineI";
+  const int* GenuineIntel = (int*)GenuineIntel_char;
+  
+  const char AuthenticAMD_char[] = "AuthcAMDenti";
+  const int* AuthenticAMD = (int*)AuthenticAMD_char;
+  
+  // Step 1: identify the CPU model
+  EIGEN_CPUID(abcd,0x0,0);
+  if(abcd[1]==GenuineIntel[0] && abcd[2]==GenuineIntel[1] && abcd[3]==GenuineIntel[2])
   {
-    switch(bytes[i])
-    {
-      case 0x0A: l1 = 8; break;   // 0Ah   data L1 cache, 8 KB, 2 ways, 32 byte lines
-      case 0x0C: l1 = 16; break;  // 0Ch   data L1 cache, 16 KB, 4 ways, 32 byte lines
-      case 0x0E: l1 = 24; break;  // 0Eh   data L1 cache, 24 KB, 6 ways, 64 byte lines
-      case 0x10: l1 = 16; break;  // 10h   data L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64)
-      case 0x15: l1 = 16; break;  // 15h   code L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64)
-      case 0x2C: l1 = 32; break;  // 2Ch   data L1 cache, 32 KB, 8 ways, 64 byte lines
-      case 0x30: l1 = 32; break;  // 30h   code L1 cache, 32 KB, 8 ways, 64 byte lines
-// 56h   L0 data TLB, 4M pages, 4 ways, 16 entries
-// 57h   L0 data TLB, 4K pages, 4 ways, 16 entries
-// 59h   L0 data TLB, 4K pages, fully, 16 entries
-      case 0x60: l1 = 16; break;  // 60h   data L1 cache, 16 KB, 8 ways, 64 byte lines, sectored
-      case 0x66: l1 = 8; break;   // 66h   data L1 cache, 8 KB, 4 ways, 64 byte lines, sectored
-      case 0x67: l1 = 16; break;  // 67h   data L1 cache, 16 KB, 4 ways, 64 byte lines, sectored
-      case 0x68: l1 = 32; break;  // 68h   data L1 cache, 32 KB, 4 ways, 64 byte lines, sectored
-// 77h   code L1 cache, 16 KB, 4 ways, 64 byte lines, sectored (IA-64)
-// 96h   data L1 TLB, 4K...256M pages, fully, 32 entries (IA-64)
-      default: break;
-    }
+    // use Intel's cpuid API
+    l1 = l2 = l3 = 0;
+    int cache_id = 0;
+    int cache_type = 0;
+    do {
+      EIGEN_CPUID(abcd,0x4,cache_id);
+      cache_type  = (abcd[0] & 0x0F) >> 0;
+      if(cache_type==1||cache_type==3) // data or unified cache
+      {
+        int cache_level = (abcd[0] & 0xE0) >> 5;  // A[7:5]
+        int ways        = (abcd[1] & 0xFFC00000) >> 22; // B[31:22]
+        int partitions  = (abcd[1] & 0x003FF000) >> 12; // B[21:12]
+        int line_size   = (abcd[1] & 0x00000FFF) >>  0; // B[11:0]
+        int sets        = (abcd[2]);                    // C[31:0]
+        
+        int cache_size = (ways+1) * (partitions+1) * (line_size+1) * (sets+1);
+        
+        switch(cache_level)
+        {
+          case 1: l1 = cache_size; break;
+          case 2: l2 = cache_size; break;
+          case 3: l3 = cache_size; break;
+          default: break;
+        }
+      }
+      cache_id++;
+    } while(cache_type>0);
   }
-
-  return l1*1024;
-  #else
-  return -1;
+  else if(abcd[1]==AuthenticAMD[0] && abcd[2]==AuthenticAMD[1] && abcd[3]==AuthenticAMD[2])
+  {
+    // use AMD's cpuid API
+    EIGEN_CPUID(abcd,0x80000005,0);
+    l1 = (abcd[2] >> 24) * 1024; // C[31:24] = L1 size in KB
+    EIGEN_CPUID(abcd,0x80000006,0);
+    l2 = (abcd[2] >> 16) * 1024; // C[31;16] = l2 cache size in KB
+    l3 = ((abcd[3] & 0xFFFC000) >> 18) * 512 * 1024; // D[31;18] = l3 cache size in 512KB
+  }
+  // TODO support other vendors
   #endif
 }
 
 /** \internal
- * \returns the size in Bytes of the L2 or L3 cache if this later is present */
-inline std::ptrdiff_t ei_queryTopLevelCacheSize()
+ * \returns the size in Bytes of the L1 data cache */
+inline int ei_queryL1CacheSize()
 {
-  #ifdef EIGEN_CPUID
-  int abcd[4];
-  EIGEN_CPUID(abcd,0x80000006);
-  std::ptrdiff_t l2 = std::ptrdiff_t(abcd[2] >> 16) * 1024;
-  std::ptrdiff_t l3 = std::ptrdiff_t((abcd[3] & 0xFFFC000) >> 18) * 512 * 1024;
+  int l1(-1), l2, l3;
+  ei_queryCacheSizes(l1,l2,l3);
+  return l1;
+}
+
+/** \internal
+ * \returns the size in Bytes of the L2 or L3 cache if this later is present */
+inline int ei_queryTopLevelCacheSize()
+{
+  int l1, l2(-1), l3(-1);
+  ei_queryCacheSizes(l1,l2,l3);
   return std::max(l2,l3);
-  #else
-  return -1;
-  #endif
 }
 
 #endif // EIGEN_MEMORY_H