Maximum Subarray with SQL

Table of Contents

1 What

2 Why

3 How

3.1 Preamble

3.2 Recursive Common Table Expressions

3.3 Generating a Data Series

3.3.1 Positive Integers

drop view if exists positive_integer;
create view if not exists positive_integer as
with recursive cnt(x) as (
     select 1
     union all
     select x+1 from cnt
     limit 1000000)
select x from cnt;
.mode column
.headers on
select * from positive_integer limit 5;
x         
----------
1         
2         
3         
4         
5         

3.3.2 Indexed Integers

drop table if exists integer1;
create table if not exists integer1 as
select * from positive_integer;

drop index if exists integer1_idx1;
create index integer1_idx1 on integer1 (x);
.mode column
.headers on
select * from integer1 limit 5;
x         
----------
1         
2         
3         
4         
5         
drop view if exists integer2;
create view if not exists integer2 as
select rowid, x as i from integer1;
.mode column
.headers on
select * from integer2 limit 5;
rowid       i         
----------  ----------
1           1         
2           2         
3           3         
4           4         
5           5         

3.3.3 Non-monotonic Integers

drop view if exists integer3;
create view if not exists integer3 as
select
    rowid,
    case when i%4 > 1 then 1000000 + i else 1000000 - i end as v
from integer2;
.mode column
.headers on
select * from integer3 limit 5;
rowid       v         
----------  ----------
1           999999    
2           1000002   
3           1000003   
4           999996    
5           999995    

3.4 Maximum Subarray

drop view if exists integer4;
create view if not exists integer4 as
with recursive max_subarray (rowid, v, max_ending_here, max_so_far, startid, endid) as (
     select
        0,0,0,0,1,1
     union
     select
        i.rowid,
        i.v,
        max(i.v, ms.max_ending_here + i.v),
        max(ms.max_so_far, max(i.v, ms.max_ending_here + i.v)),
        case when ms.max_ending_here + i.v < i.v then i.rowid else ms.startid end,
        case when max(i.v, ms.max_ending_here + i.v) > ms.max_so_far then i.rowid else ms.endid end
     from
        integer3 i, max_subarray ms
     where
        i.rowid = ms.rowid + 1
     order by 2 asc)
select
     rowid, v, max_ending_here, max_so_far, startid, endid
from max_subarray;
.mode column
.headers on
select * from integer4 limit 5;
rowid       v           max_ending_here  max_so_far  startid     endid     
----------  ----------  ---------------  ----------  ----------  ----------
0           0           0                0           1           1         
1           999999      999999           999999      1           1         
2           1000002     2000001          2000001     1           2         
3           1000003     3000004          3000004     1           3         
4           999996      4000000          4000000     1           4         

3.5 Schema

.tables
integer1          integer4          stockdata2        stockdata5      
integer2          positive_integer  stockdata3      
integer3          stockdata1        stockdata4      

4 Applications

4.1 Stock Market Profits

cat googl.js | head -n23
{
    "Meta Data": {
        "1. Information": "Daily Prices (open, high, low, close) and Volumes",
        "2. Symbol": "GOOGL",
        "3. Last Refreshed": "2017-12-08",
        "4. Output Size": "Full size",
        "5. Time Zone": "US/Eastern"
    },
    "Time Series (Daily)": {
        "2017-12-08": {
            "1. open": "1051.8100",
            "2. high": "1056.4200",
            "3. low": "1045.8600",
            "4. close": "1049.3800",
            "5. volume": "1479665"
        },
        "2017-12-07": {
            "1. open": "1036.0700",
            "2. high": "1048.9200",
            "3. low": "1035.3600",
            "4. close": "1044.5700",
            "5. volume": "1437448"
        },
cat googl.js | jq -r '."Time Series (Daily)"' | head -n15
{
  "2017-12-08": {
    "1. open": "1051.8100",
    "2. high": "1056.4200",
    "3. low": "1045.8600",
    "4. close": "1049.3800",
    "5. volume": "1479665"
  },
  "2017-12-07": {
    "1. open": "1036.0700",
    "2. high": "1048.9200",
    "3. low": "1035.3600",
    "4. close": "1044.5700",
    "5. volume": "1437448"
  },
cat googl.js | jq -r '."Time Series (Daily)"|to_entries' | head -n21
[
  {
    "key": "2017-12-08",
    "value": {
      "1. open": "1051.8100",
      "2. high": "1056.4200",
      "3. low": "1045.8600",
      "4. close": "1049.3800",
      "5. volume": "1479665"
    }
  },
  {
    "key": "2017-12-07",
    "value": {
      "1. open": "1036.0700",
      "2. high": "1048.9200",
      "3. low": "1035.3600",
      "4. close": "1044.5700",
      "5. volume": "1437448"
    }
  },
cat googl.js | jq -r '."Time Series (Daily)"|to_entries[]' | head -n20
{
  "key": "2017-12-08",
  "value": {
    "1. open": "1051.8100",
    "2. high": "1056.4200",
    "3. low": "1045.8600",
    "4. close": "1049.3800",
    "5. volume": "1479665"
  }
}
{
  "key": "2017-12-07",
  "value": {
    "1. open": "1036.0700",
    "2. high": "1048.9200",
    "3. low": "1035.3600",
    "4. close": "1044.5700",
    "5. volume": "1437448"
  }
}
cat googl.js | jq -r '."Time Series (Daily)"|to_entries[]|[.key, .value."2. high"|split(".")|join("")]' | head -n12
[
  "2017-12-08",
  "10564200"
]
[
  "2017-12-07",
  "10489200"
]
[
  "2017-12-06",
  "10395800"
]
cat googl.js | jq -r '."Time Series (Daily)"|to_entries[]|[.key, .value."2. high"|split(".")|join("")]|@csv' | head -n5
"2017-12-08","10564200"
"2017-12-07","10489200"
"2017-12-06","10395800"
"2017-12-05","10366800"
"2017-12-04","10313400"
cat googl.js | jq -r '."Time Series (Daily)"|to_entries[]|[.key, .value."2. high"|split(".")|join("")]|@csv' > data.csv &
drop table if exists stockdata1;
create table if not exists stockdata1 (
date text,
high numeric);

.mode csv
.import data.csv stockdata1
.mode column
.headers on
select * from stockdata1 limit 5;
date        high      
----------  ----------
2017-12-08  10564200  
2017-12-07  10489200  
2017-12-06  10395800  
2017-12-05  10366800  
2017-12-04  10313400  
select * from stockdata1 limit 5;

stocks.svg

drop view if exists stockdata2;
drop table if exists stockdata2;
create table if not exists stockdata2 as
select * from stockdata1 order by date(date, "utc") asc;
.mode column
.headers on
select * from stockdata2 limit 5;
date        high      
----------  ----------
2004-08-19  1040600   
2004-08-20  1090800   
2004-08-23  1134800   
2004-08-24  1116000   
2004-08-25  1080000   
.mode column
.headers on
select * from stockdata2 limit 5;
date        high      
----------  ----------
2004-08-19  1040600   
2004-08-20  1090800   
2004-08-23  1134800   
2004-08-24  1116000   
2004-08-25  1080000   
drop view if exists stockdata3;
create view if not exists stockdata3 as
select rowid, high as v from stockdata2;
.mode column
.headers on
select * from stockdata3 limit 5;
rowid       v         
----------  ----------
1           1040600   
2           1090800   
3           1134800   
4           1116000   
5           1080000   
drop view if exists stockdata4;
create view if not exists stockdata4 as
with recursive max_subarray (rowid, v, max_ending_here, max_so_far, startid, endid) as (
     select
        0,0,0,0,1,1
     union
     select
        i.rowid,
        i.v,
        max(i.v, ms.max_ending_here + i.v),
        max(ms.max_so_far, max(i.v, ms.max_ending_here + i.v)),
        case when ms.max_ending_here + i.v < i.v then i.rowid else ms.startid end,
        case when max(i.v, ms.max_ending_here + i.v) > ms.max_so_far then i.rowid else ms.endid end
     from
        stockdata3 i, max_subarray ms
     where
        i.rowid = ms.rowid + 1
     order by 2 asc)
select
     rowid, v, max_ending_here, max_so_far, startid, endid
from max_subarray;
.mode column
.headers on
select * from stockdata4 limit 5;
rowid       v           max_ending_here  max_so_far  startid     endid     
----------  ----------  ---------------  ----------  ----------  ----------
0           0           0                0           1           1         
1           1040600     1040600          1040600     1           1         
2           1090800     2131400          2131400     1           2         
3           1134800     3266200          3266200     1           3         
4           1116000     4382200          4382200     1           4         
drop view if exists stockdata5;
create view if not exists stockdata5 as
select 
  quote.date as date, 
  quote.high as high, 
  start.date as buy, 
  end.date as sell
from stockdata4 s4
inner join stockdata2 quote on quote.rowid = s4.rowid
inner join stockdata2 start on start.rowid = s4.startid
inner join stockdata2 end on end.rowid = s4.endid;
.mode column
.headers on
select * from stockdata5 limit 5;
date        high        buy         sell      
----------  ----------  ----------  ----------
2004-08-19  1040600     2004-08-19  2004-08-19
2004-08-20  1090800     2004-08-19  2004-08-20
2004-08-23  1134800     2004-08-19  2004-08-23
2004-08-24  1116000     2004-08-19  2004-08-24
2004-08-25  1080000     2004-08-19  2004-08-25
select * from stockdata2 where rowid = 0;
-- drop view if exists quote_data_in_order;
-- create view if not exists quote_data_in_order as
-- select oid, * from quote_data
-- where date(quote_date, "utc") > date("2017-01-01", "utc")
-- order by date(quote_date, "utc") asc;

5 Summary